1.Objetivo do desafio

O objetivo do desafio House prices é prever o valor de venda de um imóvel de acordo com as 79 variáveis do banco de dados. O desafio desponibiliza 4 arquivos, um dataset de treino e um dataset de teste ambos em csv, um arquivo txt com a descrição das colunas do dataset treino e um arquivo csv exemplo de como o resultado deve ser submetido. Para acessar mais informações sobre o desafio e fazer download dos dados basta acessar o link.

2. Lendo os arquivos.

# Carregando o tidyverse

library(tidyverse)
── Attaching core tidyverse packages ──────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.0     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.1     ✔ tibble    3.1.8
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.1     ── Conflicts ────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
# Setando o diretório de trabalho
setwd(dir = "C:/Users/rodolfo.paula/Desktop/PESSOAL_RODOLFO/Better Decisions/scripts/kaggle/house_prices")

# Lendo os arquivos de treino e de teste
train = data.frame(read.csv("train.csv"))
test = data.frame(read.csv("test.csv"))

# Visualizando as dimensões dos arquivos
dim(train)
[1] 1460   81
dim(test)
[1] 1459   80
# Visualizando o dataset de treino 
glimpse(train)
Rows: 1,460
Columns: 81
$ Id            <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…
$ MSSubClass    <int> 60, 20, 60, 70, 60, 50, 20, 60, 50, 190, 20, 60,…
$ MSZoning      <chr> "RL", "RL", "RL", "RL", "RL", "RL", "RL", "RL", …
$ LotFrontage   <int> 65, 80, 68, 60, 84, 85, 75, NA, 51, 50, 70, 85, …
$ LotArea       <int> 8450, 9600, 11250, 9550, 14260, 14115, 10084, 10…
$ Street        <chr> "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", …
$ Alley         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ LotShape      <chr> "Reg", "Reg", "IR1", "IR1", "IR1", "IR1", "Reg",…
$ LandContour   <chr> "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl",…
$ Utilities     <chr> "AllPub", "AllPub", "AllPub", "AllPub", "AllPub"…
$ LotConfig     <chr> "Inside", "FR2", "Inside", "Corner", "FR2", "Ins…
$ LandSlope     <chr> "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl",…
$ Neighborhood  <chr> "CollgCr", "Veenker", "CollgCr", "Crawfor", "NoR…
$ Condition1    <chr> "Norm", "Feedr", "Norm", "Norm", "Norm", "Norm",…
$ Condition2    <chr> "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", …
$ BldgType      <chr> "1Fam", "1Fam", "1Fam", "1Fam", "1Fam", "1Fam", …
$ HouseStyle    <chr> "2Story", "1Story", "2Story", "2Story", "2Story"…
$ OverallQual   <int> 7, 6, 7, 7, 8, 5, 8, 7, 7, 5, 5, 9, 5, 7, 6, 7, …
$ OverallCond   <int> 5, 8, 5, 5, 5, 5, 5, 6, 5, 6, 5, 5, 6, 5, 5, 8, …
$ YearBuilt     <int> 2003, 1976, 2001, 1915, 2000, 1993, 2004, 1973, …
$ YearRemodAdd  <int> 2003, 1976, 2002, 1970, 2000, 1995, 2005, 1973, …
$ RoofStyle     <chr> "Gable", "Gable", "Gable", "Gable", "Gable", "Ga…
$ RoofMatl      <chr> "CompShg", "CompShg", "CompShg", "CompShg", "Com…
$ Exterior1st   <chr> "VinylSd", "MetalSd", "VinylSd", "Wd Sdng", "Vin…
$ Exterior2nd   <chr> "VinylSd", "MetalSd", "VinylSd", "Wd Shng", "Vin…
$ MasVnrType    <chr> "BrkFace", "None", "BrkFace", "None", "BrkFace",…
$ MasVnrArea    <int> 196, 0, 162, 0, 350, 0, 186, 240, 0, 0, 0, 286, …
$ ExterQual     <chr> "Gd", "TA", "Gd", "TA", "Gd", "TA", "Gd", "TA", …
$ ExterCond     <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
$ Foundation    <chr> "PConc", "CBlock", "PConc", "BrkTil", "PConc", "…
$ BsmtQual      <chr> "Gd", "Gd", "Gd", "TA", "Gd", "Gd", "Ex", "Gd", …
$ BsmtCond      <chr> "TA", "TA", "TA", "Gd", "TA", "TA", "TA", "TA", …
$ BsmtExposure  <chr> "No", "Gd", "Mn", "No", "Av", "No", "Av", "Mn", …
$ BsmtFinType1  <chr> "GLQ", "ALQ", "GLQ", "ALQ", "GLQ", "GLQ", "GLQ",…
$ BsmtFinSF1    <int> 706, 978, 486, 216, 655, 732, 1369, 859, 0, 851,…
$ BsmtFinType2  <chr> "Unf", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf",…
$ BsmtFinSF2    <int> 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0,…
$ BsmtUnfSF     <int> 150, 284, 434, 540, 490, 64, 317, 216, 952, 140,…
$ TotalBsmtSF   <int> 856, 1262, 920, 756, 1145, 796, 1686, 1107, 952,…
$ Heating       <chr> "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", …
$ HeatingQC     <chr> "Ex", "Ex", "Ex", "Gd", "Ex", "Ex", "Ex", "Ex", …
$ CentralAir    <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"…
$ Electrical    <chr> "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SB…
$ X1stFlrSF     <int> 856, 1262, 920, 961, 1145, 796, 1694, 1107, 1022…
$ X2ndFlrSF     <int> 854, 0, 866, 756, 1053, 566, 0, 983, 752, 0, 0, …
$ LowQualFinSF  <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ GrLivArea     <int> 1710, 1262, 1786, 1717, 2198, 1362, 1694, 2090, …
$ BsmtFullBath  <int> 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, …
$ BsmtHalfBath  <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ FullBath      <int> 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 1, 3, 1, 2, 1, 1, …
$ HalfBath      <int> 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, …
$ BedroomAbvGr  <int> 3, 3, 3, 3, 4, 1, 3, 3, 2, 2, 3, 4, 2, 3, 2, 2, …
$ KitchenAbvGr  <int> 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, …
$ KitchenQual   <chr> "Gd", "TA", "Gd", "Gd", "Gd", "TA", "Gd", "TA", …
$ TotRmsAbvGrd  <int> 8, 6, 6, 7, 9, 5, 7, 7, 8, 5, 5, 11, 4, 7, 5, 5,…
$ Functional    <chr> "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ",…
$ Fireplaces    <int> 0, 1, 1, 1, 1, 0, 1, 2, 2, 2, 0, 2, 0, 1, 1, 0, …
$ FireplaceQu   <chr> NA, "TA", "TA", "Gd", "TA", NA, "Gd", "TA", "TA"…
$ GarageType    <chr> "Attchd", "Attchd", "Attchd", "Detchd", "Attchd"…
$ GarageYrBlt   <int> 2003, 1976, 2001, 1998, 2000, 1993, 2004, 1973, …
$ GarageFinish  <chr> "RFn", "RFn", "RFn", "Unf", "RFn", "Unf", "RFn",…
$ GarageCars    <int> 2, 2, 2, 3, 3, 2, 2, 2, 2, 1, 1, 3, 1, 3, 1, 2, …
$ GarageArea    <int> 548, 460, 608, 642, 836, 480, 636, 484, 468, 205…
$ GarageQual    <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
$ GarageCond    <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
$ PavedDrive    <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"…
$ WoodDeckSF    <int> 0, 298, 0, 0, 192, 40, 255, 235, 90, 0, 0, 147, …
$ OpenPorchSF   <int> 61, 0, 42, 35, 84, 30, 57, 204, 0, 4, 0, 21, 0, …
$ EnclosedPorch <int> 0, 0, 0, 272, 0, 0, 0, 228, 205, 0, 0, 0, 0, 0, …
$ X3SsnPorch    <int> 0, 0, 0, 0, 0, 320, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ ScreenPorch   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 176, 0, 0, 0…
$ PoolArea      <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ PoolQC        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ Fence         <chr> NA, NA, NA, NA, NA, "MnPrv", NA, NA, NA, NA, NA,…
$ MiscFeature   <chr> NA, NA, NA, NA, NA, "Shed", NA, "Shed", NA, NA, …
$ MiscVal       <int> 0, 0, 0, 0, 0, 700, 0, 350, 0, 0, 0, 0, 0, 0, 0,…
$ MoSold        <int> 2, 5, 9, 2, 12, 10, 8, 11, 4, 1, 2, 7, 9, 8, 5, …
$ YrSold        <int> 2008, 2007, 2008, 2006, 2008, 2009, 2007, 2009, …
$ SaleType      <chr> "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", …
$ SaleCondition <chr> "Normal", "Normal", "Normal", "Abnorml", "Normal…
$ SalePrice     <int> 208500, 181500, 223500, 140000, 250000, 143000, …
# Obtendo um sumário dos dados de treino

summary(train)
       Id           MSSubClass      MSZoning          LotFrontage    
 Min.   :   1.0   Min.   : 20.0   Length:1460        Min.   : 21.00  
 1st Qu.: 365.8   1st Qu.: 20.0   Class :character   1st Qu.: 59.00  
 Median : 730.5   Median : 50.0   Mode  :character   Median : 69.00  
 Mean   : 730.5   Mean   : 56.9                      Mean   : 70.05  
 3rd Qu.:1095.2   3rd Qu.: 70.0                      3rd Qu.: 80.00  
 Max.   :1460.0   Max.   :190.0                      Max.   :313.00  
                                                     NA's   :259     
    LotArea          Street             Alley          
 Min.   :  1300   Length:1460        Length:1460       
 1st Qu.:  7554   Class :character   Class :character  
 Median :  9478   Mode  :character   Mode  :character  
 Mean   : 10517                                        
 3rd Qu.: 11602                                        
 Max.   :215245                                        
                                                       
   LotShape         LandContour         Utilities        
 Length:1460        Length:1460        Length:1460       
 Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character  
                                                         
                                                         
                                                         
                                                         
  LotConfig          LandSlope         Neighborhood      
 Length:1460        Length:1460        Length:1460       
 Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character  
                                                         
                                                         
                                                         
                                                         
  Condition1         Condition2          BldgType        
 Length:1460        Length:1460        Length:1460       
 Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character  
                                                         
                                                         
                                                         
                                                         
  HouseStyle         OverallQual      OverallCond      YearBuilt   
 Length:1460        Min.   : 1.000   Min.   :1.000   Min.   :1872  
 Class :character   1st Qu.: 5.000   1st Qu.:5.000   1st Qu.:1954  
 Mode  :character   Median : 6.000   Median :5.000   Median :1973  
                    Mean   : 6.099   Mean   :5.575   Mean   :1971  
                    3rd Qu.: 7.000   3rd Qu.:6.000   3rd Qu.:2000  
                    Max.   :10.000   Max.   :9.000   Max.   :2010  
                                                                   
  YearRemodAdd   RoofStyle           RoofMatl        
 Min.   :1950   Length:1460        Length:1460       
 1st Qu.:1967   Class :character   Class :character  
 Median :1994   Mode  :character   Mode  :character  
 Mean   :1985                                        
 3rd Qu.:2004                                        
 Max.   :2010                                        
                                                     
 Exterior1st        Exterior2nd         MasVnrType       
 Length:1460        Length:1460        Length:1460       
 Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character  
                                                         
                                                         
                                                         
                                                         
   MasVnrArea      ExterQual          ExterCond        
 Min.   :   0.0   Length:1460        Length:1460       
 1st Qu.:   0.0   Class :character   Class :character  
 Median :   0.0   Mode  :character   Mode  :character  
 Mean   : 103.7                                        
 3rd Qu.: 166.0                                        
 Max.   :1600.0                                        
 NA's   :8                                             
  Foundation          BsmtQual           BsmtCond        
 Length:1460        Length:1460        Length:1460       
 Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character  
                                                         
                                                         
                                                         
                                                         
 BsmtExposure       BsmtFinType1         BsmtFinSF1    
 Length:1460        Length:1460        Min.   :   0.0  
 Class :character   Class :character   1st Qu.:   0.0  
 Mode  :character   Mode  :character   Median : 383.5  
                                       Mean   : 443.6  
                                       3rd Qu.: 712.2  
                                       Max.   :5644.0  
                                                       
 BsmtFinType2         BsmtFinSF2        BsmtUnfSF       TotalBsmtSF    
 Length:1460        Min.   :   0.00   Min.   :   0.0   Min.   :   0.0  
 Class :character   1st Qu.:   0.00   1st Qu.: 223.0   1st Qu.: 795.8  
 Mode  :character   Median :   0.00   Median : 477.5   Median : 991.5  
                    Mean   :  46.55   Mean   : 567.2   Mean   :1057.4  
                    3rd Qu.:   0.00   3rd Qu.: 808.0   3rd Qu.:1298.2  
                    Max.   :1474.00   Max.   :2336.0   Max.   :6110.0  
                                                                       
   Heating           HeatingQC          CentralAir       
 Length:1460        Length:1460        Length:1460       
 Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character  
                                                         
                                                         
                                                         
                                                         
  Electrical          X1stFlrSF      X2ndFlrSF     LowQualFinSF    
 Length:1460        Min.   : 334   Min.   :   0   Min.   :  0.000  
 Class :character   1st Qu.: 882   1st Qu.:   0   1st Qu.:  0.000  
 Mode  :character   Median :1087   Median :   0   Median :  0.000  
                    Mean   :1163   Mean   : 347   Mean   :  5.845  
                    3rd Qu.:1391   3rd Qu.: 728   3rd Qu.:  0.000  
                    Max.   :4692   Max.   :2065   Max.   :572.000  
                                                                   
   GrLivArea     BsmtFullBath     BsmtHalfBath        FullBath    
 Min.   : 334   Min.   :0.0000   Min.   :0.00000   Min.   :0.000  
 1st Qu.:1130   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:1.000  
 Median :1464   Median :0.0000   Median :0.00000   Median :2.000  
 Mean   :1515   Mean   :0.4253   Mean   :0.05753   Mean   :1.565  
 3rd Qu.:1777   3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:2.000  
 Max.   :5642   Max.   :3.0000   Max.   :2.00000   Max.   :3.000  
                                                                  
    HalfBath       BedroomAbvGr    KitchenAbvGr   KitchenQual       
 Min.   :0.0000   Min.   :0.000   Min.   :0.000   Length:1460       
 1st Qu.:0.0000   1st Qu.:2.000   1st Qu.:1.000   Class :character  
 Median :0.0000   Median :3.000   Median :1.000   Mode  :character  
 Mean   :0.3829   Mean   :2.866   Mean   :1.047                     
 3rd Qu.:1.0000   3rd Qu.:3.000   3rd Qu.:1.000                     
 Max.   :2.0000   Max.   :8.000   Max.   :3.000                     
                                                                    
  TotRmsAbvGrd     Functional          Fireplaces    FireplaceQu       
 Min.   : 2.000   Length:1460        Min.   :0.000   Length:1460       
 1st Qu.: 5.000   Class :character   1st Qu.:0.000   Class :character  
 Median : 6.000   Mode  :character   Median :1.000   Mode  :character  
 Mean   : 6.518                      Mean   :0.613                     
 3rd Qu.: 7.000                      3rd Qu.:1.000                     
 Max.   :14.000                      Max.   :3.000                     
                                                                       
  GarageType         GarageYrBlt   GarageFinish         GarageCars   
 Length:1460        Min.   :1900   Length:1460        Min.   :0.000  
 Class :character   1st Qu.:1961   Class :character   1st Qu.:1.000  
 Mode  :character   Median :1980   Mode  :character   Median :2.000  
                    Mean   :1979                      Mean   :1.767  
                    3rd Qu.:2002                      3rd Qu.:2.000  
                    Max.   :2010                      Max.   :4.000  
                    NA's   :81                                       
   GarageArea      GarageQual         GarageCond       
 Min.   :   0.0   Length:1460        Length:1460       
 1st Qu.: 334.5   Class :character   Class :character  
 Median : 480.0   Mode  :character   Mode  :character  
 Mean   : 473.0                                        
 3rd Qu.: 576.0                                        
 Max.   :1418.0                                        
                                                       
  PavedDrive          WoodDeckSF      OpenPorchSF     EnclosedPorch   
 Length:1460        Min.   :  0.00   Min.   :  0.00   Min.   :  0.00  
 Class :character   1st Qu.:  0.00   1st Qu.:  0.00   1st Qu.:  0.00  
 Mode  :character   Median :  0.00   Median : 25.00   Median :  0.00  
                    Mean   : 94.24   Mean   : 46.66   Mean   : 21.95  
                    3rd Qu.:168.00   3rd Qu.: 68.00   3rd Qu.:  0.00  
                    Max.   :857.00   Max.   :547.00   Max.   :552.00  
                                                                      
   X3SsnPorch      ScreenPorch        PoolArea          PoolQC         
 Min.   :  0.00   Min.   :  0.00   Min.   :  0.000   Length:1460       
 1st Qu.:  0.00   1st Qu.:  0.00   1st Qu.:  0.000   Class :character  
 Median :  0.00   Median :  0.00   Median :  0.000   Mode  :character  
 Mean   :  3.41   Mean   : 15.06   Mean   :  2.759                     
 3rd Qu.:  0.00   3rd Qu.:  0.00   3rd Qu.:  0.000                     
 Max.   :508.00   Max.   :480.00   Max.   :738.000                     
                                                                       
    Fence           MiscFeature           MiscVal        
 Length:1460        Length:1460        Min.   :    0.00  
 Class :character   Class :character   1st Qu.:    0.00  
 Mode  :character   Mode  :character   Median :    0.00  
                                       Mean   :   43.49  
                                       3rd Qu.:    0.00  
                                       Max.   :15500.00  
                                                         
     MoSold           YrSold       SaleType         SaleCondition     
 Min.   : 1.000   Min.   :2006   Length:1460        Length:1460       
 1st Qu.: 5.000   1st Qu.:2007   Class :character   Class :character  
 Median : 6.000   Median :2008   Mode  :character   Mode  :character  
 Mean   : 6.322   Mean   :2008                                        
 3rd Qu.: 8.000   3rd Qu.:2009                                        
 Max.   :12.000   Max.   :2010                                        
                                                                      
   SalePrice     
 Min.   : 34900  
 1st Qu.:129975  
 Median :163000  
 Mean   :180921  
 3rd Qu.:214000  
 Max.   :755000  
                 

Utilizando o sumário fica claro que o banco de dados de treino possui muitas variáveis. Vamos incialmente separar o banco de dados em variáveis qualitativas e variáveis quantitativas, assim se torna mais facil o tratamento e limpeza dos dados.

Para as variáveis qualitativas que apresentem valores faltantes ou NA`s vamos substituir pela moda daquela variável e para as variáveis quantitativas de que apresentem valores faltantes vamos substituir pela média.

A coluna Id será eliminada por ser um valor sequencial que nao representa nehuma informação útil como variável explicativa capaz de influenciar no valor predito do preço de venda de uma casa.

Para separar o dataset em dados quantitativos e qualitativos o melhor caminho é usara a função select_if() do pacote dplyr


# Separando o dataframe em train quanti e train quali usando a função select_if

train_quanti <- select_if(train, is.numeric)
train_quali <- select_if(train, is.character)

3. Limpesa de dados Qualitativos

#Visualizando os dados 
glimpse(train_quali)
Rows: 1,460
Columns: 43
$ MSZoning      <chr> "RL", "RL", "RL", "RL", "RL", "RL", "RL", "RL", …
$ Street        <chr> "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", …
$ Alley         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ LotShape      <chr> "Reg", "Reg", "IR1", "IR1", "IR1", "IR1", "Reg",…
$ LandContour   <chr> "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl",…
$ Utilities     <chr> "AllPub", "AllPub", "AllPub", "AllPub", "AllPub"…
$ LotConfig     <chr> "Inside", "FR2", "Inside", "Corner", "FR2", "Ins…
$ LandSlope     <chr> "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl",…
$ Neighborhood  <chr> "CollgCr", "Veenker", "CollgCr", "Crawfor", "NoR…
$ Condition1    <chr> "Norm", "Feedr", "Norm", "Norm", "Norm", "Norm",…
$ Condition2    <chr> "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", …
$ BldgType      <chr> "1Fam", "1Fam", "1Fam", "1Fam", "1Fam", "1Fam", …
$ HouseStyle    <chr> "2Story", "1Story", "2Story", "2Story", "2Story"…
$ RoofStyle     <chr> "Gable", "Gable", "Gable", "Gable", "Gable", "Ga…
$ RoofMatl      <chr> "CompShg", "CompShg", "CompShg", "CompShg", "Com…
$ Exterior1st   <chr> "VinylSd", "MetalSd", "VinylSd", "Wd Sdng", "Vin…
$ Exterior2nd   <chr> "VinylSd", "MetalSd", "VinylSd", "Wd Shng", "Vin…
$ MasVnrType    <chr> "BrkFace", "None", "BrkFace", "None", "BrkFace",…
$ ExterQual     <chr> "Gd", "TA", "Gd", "TA", "Gd", "TA", "Gd", "TA", …
$ ExterCond     <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
$ Foundation    <chr> "PConc", "CBlock", "PConc", "BrkTil", "PConc", "…
$ BsmtQual      <chr> "Gd", "Gd", "Gd", "TA", "Gd", "Gd", "Ex", "Gd", …
$ BsmtCond      <chr> "TA", "TA", "TA", "Gd", "TA", "TA", "TA", "TA", …
$ BsmtExposure  <chr> "No", "Gd", "Mn", "No", "Av", "No", "Av", "Mn", …
$ BsmtFinType1  <chr> "GLQ", "ALQ", "GLQ", "ALQ", "GLQ", "GLQ", "GLQ",…
$ BsmtFinType2  <chr> "Unf", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf",…
$ Heating       <chr> "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", …
$ HeatingQC     <chr> "Ex", "Ex", "Ex", "Gd", "Ex", "Ex", "Ex", "Ex", …
$ CentralAir    <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"…
$ Electrical    <chr> "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SB…
$ KitchenQual   <chr> "Gd", "TA", "Gd", "Gd", "Gd", "TA", "Gd", "TA", …
$ Functional    <chr> "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ",…
$ FireplaceQu   <chr> NA, "TA", "TA", "Gd", "TA", NA, "Gd", "TA", "TA"…
$ GarageType    <chr> "Attchd", "Attchd", "Attchd", "Detchd", "Attchd"…
$ GarageFinish  <chr> "RFn", "RFn", "RFn", "Unf", "RFn", "Unf", "RFn",…
$ GarageQual    <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
$ GarageCond    <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
$ PavedDrive    <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"…
$ PoolQC        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ Fence         <chr> NA, NA, NA, NA, NA, "MnPrv", NA, NA, NA, NA, NA,…
$ MiscFeature   <chr> NA, NA, NA, NA, NA, "Shed", NA, "Shed", NA, NA, …
$ SaleType      <chr> "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", …
$ SaleCondition <chr> "Normal", "Normal", "Normal", "Abnorml", "Normal…
# Procurando Valores missing
colSums(is.na(train_quali))
     MSZoning        Street         Alley      LotShape   LandContour 
            0             0          1369             0             0 
    Utilities     LotConfig     LandSlope  Neighborhood    Condition1 
            0             0             0             0             0 
   Condition2      BldgType    HouseStyle     RoofStyle      RoofMatl 
            0             0             0             0             0 
  Exterior1st   Exterior2nd    MasVnrType     ExterQual     ExterCond 
            0             0             8             0             0 
   Foundation      BsmtQual      BsmtCond  BsmtExposure  BsmtFinType1 
            0            37            37            38            37 
 BsmtFinType2       Heating     HeatingQC    CentralAir    Electrical 
           38             0             0             0             1 
  KitchenQual    Functional   FireplaceQu    GarageType  GarageFinish 
            0             0           690            81            81 
   GarageQual    GarageCond    PavedDrive        PoolQC         Fence 
           81            81             0          1453          1179 
  MiscFeature      SaleType SaleCondition 
         1406             0             0 

Com essa analise a cima sabemos o seguinte sobre os nossos dados categóricos:


# Deletando as colunas que não são de interesse para o dataset. 

train_quali <-train_quali %>% select(-c(Alley, PoolQC, Fence, MiscFeature))
print(dim(train_quali))
[1] 1460   39
# Lista com as variáveis : MasVnrType,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinType2,Electrical,
# FireplaceQu,GarageType,GarageFinish,GarageQual,GarageCond
##############################################################
# Contando os valores NA`s das variáveis com missing values
unique(train_quali$MasVnrType)
[1] "BrkFace" "None"    "Stone"   "BrkCmn"  NA       
sort(table(train_quali$MasVnrType))

 BrkCmn   Stone BrkFace    None 
     15     128     445     864 
# Alterando os valores de NA para o valor da Moda de MasVnrType.
train_quali <- train_quali %>% mutate(MasVnrType=coalesce(MasVnrType,"None"))

##############################################
# Contando os valores NA`s das variáveis com missing values
unique(train_quali$BsmtQual)
[1] "Gd" "TA" "Ex" NA   "Fa"
sort(table(train_quali$BsmtQual))

 Fa  Ex  Gd  TA 
 35 121 618 649 
# Alterando os valores de NA para o valor da Moda de BsmtQual.
train_quali <- train_quali %>%    
  mutate(BsmtQual=coalesce(BsmtQual,"TA"))

##############################################

# Contando os valores NA`s das variáveis com missing values
unique(train_quali$BsmtCond)
[1] "TA" "Gd" NA   "Fa" "Po"
sort(table(train_quali$BsmtCond))

  Po   Fa   Gd   TA 
   2   45   65 1311 
# Alterando os valores de NA para o valor da Moda de BsmtCond
train_quali <- train_quali %>%    
  mutate(BsmtCond=coalesce(BsmtCond,"TA"))

##############################################

# Contando os valores NA`s das variáveis com missing values
unique(train_quali$BsmtExposure)
[1] "No" "Gd" "Mn" "Av" NA  
sort(table(train_quali$BsmtExposure))

 Mn  Gd  Av  No 
114 134 221 953 
# Alterando os valores de NA para o valor da Moda de BsmtExposure
train_quali <- train_quali %>%    
  mutate(BsmtExposure=coalesce(BsmtExposure,"No"))

##############################################

# Contando os valores NA`s das variáveis com missing values
unique(train_quali$BsmtFinType1)
[1] "GLQ" "ALQ" "Unf" "Rec" "BLQ" NA    "LwQ"
sort(table(train_quali$BsmtFinType1))

LwQ Rec BLQ ALQ GLQ Unf 
 74 133 148 220 418 430 
# Alterando os valores de NA para o valor da Moda de BsmtFinType1
train_quali <- train_quali %>%    
  mutate(BsmtFinType1=coalesce(BsmtFinType1,"Unf"))
##############################################

# Contando os valores NA`s das variáveis com missing values
unique(train_quali$BsmtFinType2)
[1] "Unf" "BLQ" NA    "ALQ" "Rec" "LwQ" "GLQ"
sort(table(train_quali$BsmtFinType2))

 GLQ  ALQ  BLQ  LwQ  Rec  Unf 
  14   19   33   46   54 1256 
# Alterando os valores de NA para o valor da Moda de BsmtFinType2
train_quali <- train_quali %>%    
  mutate(BsmtFinType2=coalesce(BsmtFinType2,"Unf"))

##############################################

# Contando os valores NA`s das variáveis com missing values
unique(train_quali$Electrical)
[1] "SBrkr" "FuseF" "FuseA" "FuseP" "Mix"   NA     
sort(table(train_quali$Electrical))

  Mix FuseP FuseF FuseA SBrkr 
    1     3    27    94  1334 
# Alterando os valores de NA para o valor da Moda de Electrical
train_quali <- train_quali %>%    
  mutate(Electrical=coalesce(Electrical,"SBrkr"))


##############################################

# Contando os valores NA`s das variáveis com missing values
unique(train_quali$FireplaceQu)
[1] NA   "TA" "Gd" "Fa" "Ex" "Po"
sort(table(train_quali$FireplaceQu))

 Po  Ex  Fa  TA  Gd 
 20  24  33 313 380 
# Alterando os valores de NA para o valor da Moda de FireplaceQu
train_quali <- train_quali %>%    
  mutate(FireplaceQu=coalesce(FireplaceQu,"Gd"))

##############################################

# Contando os valores NA`s das variáveis com missing values
unique(train_quali$GarageType)
[1] "Attchd"  "Detchd"  "BuiltIn" "CarPort" NA        "Basment"
[7] "2Types" 
sort(table(train_quali$GarageType))

 2Types CarPort Basment BuiltIn  Detchd  Attchd 
      6       9      19      88     387     870 
# Alterando os valores de NA para o valor da Moda de GarageType
train_quali <- train_quali %>%    
  mutate(GarageType=coalesce(GarageType,"Attchd"))

##############################################

# Contando os valores NA`s das variáveis com missing values
unique(train_quali$GarageFinish)
[1] "RFn" "Unf" "Fin" NA   
sort(table(train_quali$GarageFinish))

Fin RFn Unf 
352 422 605 
# Alterando os valores de NA para o valor da Moda de GarageFinish
train_quali <- train_quali %>%    
  mutate(GarageFinish=coalesce(GarageFinish,"Unf"))

##############################################

# Contando os valores NA`s das variáveis com missing values
unique(train_quali$GarageQual)
[1] "TA" "Fa" "Gd" NA   "Ex" "Po"
sort(table(train_quali$GarageQual))

  Ex   Po   Gd   Fa   TA 
   3    3   14   48 1311 
# Alterando os valores de NA para o valor da Moda de GarageQual
train_quali <- train_quali %>%    
  mutate(GarageQual=coalesce(GarageQual,"TA"))

##############################################

# Contando os valores NA`s das variáveis com missing values
unique(train_quali$GarageCond)
[1] "TA" "Fa" NA   "Gd" "Po" "Ex"
sort(table(train_quali$GarageCond))

  Ex   Po   Gd   Fa   TA 
   2    7    9   35 1326 
# Alterando os valores de NA para o valor da Moda de GarageCond
train_quali <- train_quali %>%    
  mutate(GarageCond=coalesce(GarageCond,"TA"))

No entanto, alterar na mão cada variável não é a solução mais inteligente, imagine um caso de um dataset com centenas ou até milhares de variáveis para serem alteradas.

O melhor a ser feito é loop do tipo for, capaz de varrer todas as variáveis, obter o valor de frequencia para cada valor e armazenar este com a moda e em seguida substituir os missing values de cada variável.

Vamos então carregar o banco de dados novamente para anular as transformações realizadas e separar em dados quantitativos e qualitativos e em seguida somar os valores NA`s das variáveis.


# Somando os valores NA`s para cada coluna
colSums(is.na(train_quali))
     MSZoning        Street      LotShape   LandContour     Utilities 
            0             0             0             0             0 
    LotConfig     LandSlope  Neighborhood    Condition1    Condition2 
            0             0             0             0             0 
     BldgType    HouseStyle     RoofStyle      RoofMatl   Exterior1st 
            0             0             0             0             0 
  Exterior2nd    MasVnrType     ExterQual     ExterCond    Foundation 
            0             0             0             0             0 
     BsmtQual      BsmtCond  BsmtExposure  BsmtFinType1  BsmtFinType2 
            0             0             0             0             0 
      Heating     HeatingQC    CentralAir    Electrical   KitchenQual 
            0             0             0             0             0 
   Functional   FireplaceQu    GarageType  GarageFinish    GarageQual 
            0             0             0             0             0 
   GarageCond    PavedDrive      SaleType SaleCondition 
            0             0             0             0 
# Montando um loop do tipo for 
for(item in colnames(train_quali)){
  
  x = data.frame(sort(table(train_quali[item])))
  #print(x)
  mode = tail(x, n = 1)
  #print(mode)
  moda = (mode[1,1])
  moda <- (as.character(moda))
  #print(moda)
  train_quali <- train_quali %>% 
  mutate(across(item, ~ case_when(is.na(.) ~ moda, TRUE ~ .)))
  
  
  
}
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `across(item, ~case_when(is.na(.) ~ moda, TRUE ~ .))`.
Caused by warning:
! Using an external vector in selections was deprecated in tidyselect
  1.1.0.
ℹ Please use `all_of()` or `any_of()` instead.
  # Was:
  data %>% select(item)

  # Now:
  data %>% select(all_of(item))

See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning
was generated.
# testando o resultado
unique(train_quali$Street)
[1] "Pave" "Grvl"
table(train_quali$Street)

Grvl Pave 
   6 1454 
# Somando os valores NA`s para cada coluna para conferir os resultados 
colSums(is.na(train_quali))
     MSZoning        Street      LotShape   LandContour     Utilities 
            0             0             0             0             0 
    LotConfig     LandSlope  Neighborhood    Condition1    Condition2 
            0             0             0             0             0 
     BldgType    HouseStyle     RoofStyle      RoofMatl   Exterior1st 
            0             0             0             0             0 
  Exterior2nd    MasVnrType     ExterQual     ExterCond    Foundation 
            0             0             0             0             0 
     BsmtQual      BsmtCond  BsmtExposure  BsmtFinType1  BsmtFinType2 
            0             0             0             0             0 
      Heating     HeatingQC    CentralAir    Electrical   KitchenQual 
            0             0             0             0             0 
   Functional   FireplaceQu    GarageType  GarageFinish    GarageQual 
            0             0             0             0             0 
   GarageCond    PavedDrive      SaleType SaleCondition 
            0             0             0             0 

4. Limpeza de dados quantitativos

Da mesma maneira que realizamos a limpeza dos dados faltantes no banco de dados qualitativos iniciamos aqui a limpeza de dados faltantes nos dados quantitativos

# Visualizando os dados 
glimpse(train_quanti)
Rows: 1,460
Columns: 38
$ Id            <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…
$ MSSubClass    <int> 60, 20, 60, 70, 60, 50, 20, 60, 50, 190, 20, 60,…
$ LotFrontage   <int> 65, 80, 68, 60, 84, 85, 75, NA, 51, 50, 70, 85, …
$ LotArea       <int> 8450, 9600, 11250, 9550, 14260, 14115, 10084, 10…
$ OverallQual   <int> 7, 6, 7, 7, 8, 5, 8, 7, 7, 5, 5, 9, 5, 7, 6, 7, …
$ OverallCond   <int> 5, 8, 5, 5, 5, 5, 5, 6, 5, 6, 5, 5, 6, 5, 5, 8, …
$ YearBuilt     <int> 2003, 1976, 2001, 1915, 2000, 1993, 2004, 1973, …
$ YearRemodAdd  <int> 2003, 1976, 2002, 1970, 2000, 1995, 2005, 1973, …
$ MasVnrArea    <int> 196, 0, 162, 0, 350, 0, 186, 240, 0, 0, 0, 286, …
$ BsmtFinSF1    <int> 706, 978, 486, 216, 655, 732, 1369, 859, 0, 851,…
$ BsmtFinSF2    <int> 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0,…
$ BsmtUnfSF     <int> 150, 284, 434, 540, 490, 64, 317, 216, 952, 140,…
$ TotalBsmtSF   <int> 856, 1262, 920, 756, 1145, 796, 1686, 1107, 952,…
$ X1stFlrSF     <int> 856, 1262, 920, 961, 1145, 796, 1694, 1107, 1022…
$ X2ndFlrSF     <int> 854, 0, 866, 756, 1053, 566, 0, 983, 752, 0, 0, …
$ LowQualFinSF  <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ GrLivArea     <int> 1710, 1262, 1786, 1717, 2198, 1362, 1694, 2090, …
$ BsmtFullBath  <int> 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, …
$ BsmtHalfBath  <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ FullBath      <int> 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 1, 3, 1, 2, 1, 1, …
$ HalfBath      <int> 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, …
$ BedroomAbvGr  <int> 3, 3, 3, 3, 4, 1, 3, 3, 2, 2, 3, 4, 2, 3, 2, 2, …
$ KitchenAbvGr  <int> 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, …
$ TotRmsAbvGrd  <int> 8, 6, 6, 7, 9, 5, 7, 7, 8, 5, 5, 11, 4, 7, 5, 5,…
$ Fireplaces    <int> 0, 1, 1, 1, 1, 0, 1, 2, 2, 2, 0, 2, 0, 1, 1, 0, …
$ GarageYrBlt   <int> 2003, 1976, 2001, 1998, 2000, 1993, 2004, 1973, …
$ GarageCars    <int> 2, 2, 2, 3, 3, 2, 2, 2, 2, 1, 1, 3, 1, 3, 1, 2, …
$ GarageArea    <int> 548, 460, 608, 642, 836, 480, 636, 484, 468, 205…
$ WoodDeckSF    <int> 0, 298, 0, 0, 192, 40, 255, 235, 90, 0, 0, 147, …
$ OpenPorchSF   <int> 61, 0, 42, 35, 84, 30, 57, 204, 0, 4, 0, 21, 0, …
$ EnclosedPorch <int> 0, 0, 0, 272, 0, 0, 0, 228, 205, 0, 0, 0, 0, 0, …
$ X3SsnPorch    <int> 0, 0, 0, 0, 0, 320, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ ScreenPorch   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 176, 0, 0, 0…
$ PoolArea      <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ MiscVal       <int> 0, 0, 0, 0, 0, 700, 0, 350, 0, 0, 0, 0, 0, 0, 0,…
$ MoSold        <int> 2, 5, 9, 2, 12, 10, 8, 11, 4, 1, 2, 7, 9, 8, 5, …
$ YrSold        <int> 2008, 2007, 2008, 2006, 2008, 2009, 2007, 2009, …
$ SalePrice     <int> 208500, 181500, 223500, 140000, 250000, 143000, …
# Somando os valores faltantes
colSums(is.na(train_quanti))
           Id    MSSubClass   LotFrontage       LotArea   OverallQual 
            0             0           259             0             0 
  OverallCond     YearBuilt  YearRemodAdd    MasVnrArea    BsmtFinSF1 
            0             0             0             8             0 
   BsmtFinSF2     BsmtUnfSF   TotalBsmtSF     X1stFlrSF     X2ndFlrSF 
            0             0             0             0             0 
 LowQualFinSF     GrLivArea  BsmtFullBath  BsmtHalfBath      FullBath 
            0             0             0             0             0 
     HalfBath  BedroomAbvGr  KitchenAbvGr  TotRmsAbvGrd    Fireplaces 
            0             0             0             0             0 
  GarageYrBlt    GarageCars    GarageArea    WoodDeckSF   OpenPorchSF 
           81             0             0             0             0 
EnclosedPorch    X3SsnPorch   ScreenPorch      PoolArea       MiscVal 
            0             0             0             0             0 
       MoSold        YrSold     SalePrice 
            0             0             0 

Com o resultado do somatório dos valores ausentes por variável do banco de dados quantitativos, observamos que apenas 3 variáveis possuem missing values. Sendo assim a melhor opção é utilizar a função mutate() e substituir os NA`s pela média entre as observações para cada variável.

# LotFrontage
train_quanti <- train_quanti %>%    
  mutate(LotFrontage=coalesce(LotFrontage,
                              mean(x =train_quanti$LotFrontage, na.rm = T )))

# MasVnrArea
train_quanti <- train_quanti %>%    
  mutate(MasVnrArea=coalesce(MasVnrArea,
                              mean(x =train_quanti$MasVnrArea, na.rm = T )))

# GarageYrBlt
train_quanti <- train_quanti %>%    
  mutate(GarageYrBlt=coalesce(GarageYrBlt,
                              mean(x =train_quanti$GarageYrBlt, na.rm = T )))

# Conferindo se ainda existe valores faltantes:
colSums(is.na(train_quanti))

5. Unindo os dataframes quali e quanti de treino

Após a limpeza dos dados de forma adequada de acordo com o tipo de dado, onde para os dados categóricos nós utilizamos o valor da moda para substituir os missing values e o valor da media para substituir os dados ausente nos dados quantitativos, podemos com segurança unir esse bancos de dados em apenas um banco de dado já tratado e pronto para a etapa de análise de dados.


# Unindo os bancos de dados

train.clean <- cbind(train_quali,train_quanti)
names(train.clean)
 [1] "MSZoning"      "Street"        "LotShape"      "LandContour"  
 [5] "Utilities"     "LotConfig"     "LandSlope"     "Neighborhood" 
 [9] "Condition1"    "Condition2"    "BldgType"      "HouseStyle"   
[13] "RoofStyle"     "RoofMatl"      "Exterior1st"   "Exterior2nd"  
[17] "MasVnrType"    "ExterQual"     "ExterCond"     "Foundation"   
[21] "BsmtQual"      "BsmtCond"      "BsmtExposure"  "BsmtFinType1" 
[25] "BsmtFinType2"  "Heating"       "HeatingQC"     "CentralAir"   
[29] "Electrical"    "KitchenQual"   "Functional"    "FireplaceQu"  
[33] "GarageType"    "GarageFinish"  "GarageQual"    "GarageCond"   
[37] "PavedDrive"    "SaleType"      "SaleCondition" "Id"           
[41] "MSSubClass"    "LotFrontage"   "LotArea"       "OverallQual"  
[45] "OverallCond"   "YearBuilt"     "YearRemodAdd"  "MasVnrArea"   
[49] "BsmtFinSF1"    "BsmtFinSF2"    "BsmtUnfSF"     "TotalBsmtSF"  
[53] "X1stFlrSF"     "X2ndFlrSF"     "LowQualFinSF"  "GrLivArea"    
[57] "BsmtFullBath"  "BsmtHalfBath"  "FullBath"      "HalfBath"     
[61] "BedroomAbvGr"  "KitchenAbvGr"  "TotRmsAbvGrd"  "Fireplaces"   
[65] "GarageYrBlt"   "GarageCars"    "GarageArea"    "WoodDeckSF"   
[69] "OpenPorchSF"   "EnclosedPorch" "X3SsnPorch"    "ScreenPorch"  
[73] "PoolArea"      "MiscVal"       "MoSold"        "YrSold"       
[77] "SalePrice"    

6. Limpando os dados de teste.

Da mesma forma que procedemos com os dados de treino iremos abordar agora os dados de teste e deixar todos os nossos bancos de dados prontos para a etapa de análise exploratória de dados. A primeira etapa será a separação em dados qualitativos e dados quantitativos, seguido da limpeza dos valores ausentes e por fim a união dos bancos quali e quanti de teste em apenas um dataframe tratado.

# Visualizando os dados de teste
glimpse(test)
Rows: 1,459
Columns: 80
$ Id            <int> 1461, 1462, 1463, 1464, 1465, 1466, 1467, 1468, …
$ MSSubClass    <int> 20, 20, 60, 60, 120, 60, 20, 60, 20, 20, 120, 16…
$ MSZoning      <chr> "RH", "RL", "RL", "RL", "RL", "RL", "RL", "RL", …
$ LotFrontage   <int> 80, 81, 74, 78, 43, 75, NA, 63, 85, 70, 26, 21, …
$ LotArea       <int> 11622, 14267, 13830, 9978, 5005, 10000, 7980, 84…
$ Street        <chr> "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", …
$ Alley         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ LotShape      <chr> "Reg", "IR1", "IR1", "IR1", "IR1", "IR1", "IR1",…
$ LandContour   <chr> "Lvl", "Lvl", "Lvl", "Lvl", "HLS", "Lvl", "Lvl",…
$ Utilities     <chr> "AllPub", "AllPub", "AllPub", "AllPub", "AllPub"…
$ LotConfig     <chr> "Inside", "Corner", "Inside", "Inside", "Inside"…
$ LandSlope     <chr> "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl",…
$ Neighborhood  <chr> "NAmes", "NAmes", "Gilbert", "Gilbert", "StoneBr…
$ Condition1    <chr> "Feedr", "Norm", "Norm", "Norm", "Norm", "Norm",…
$ Condition2    <chr> "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", …
$ BldgType      <chr> "1Fam", "1Fam", "1Fam", "1Fam", "TwnhsE", "1Fam"…
$ HouseStyle    <chr> "1Story", "1Story", "2Story", "2Story", "1Story"…
$ OverallQual   <int> 5, 6, 5, 6, 8, 6, 6, 6, 7, 4, 7, 6, 5, 6, 7, 9, …
$ OverallCond   <int> 6, 6, 5, 6, 5, 5, 7, 5, 5, 5, 5, 5, 5, 6, 6, 5, …
$ YearBuilt     <int> 1961, 1958, 1997, 1998, 1992, 1993, 1992, 1998, …
$ YearRemodAdd  <int> 1961, 1958, 1998, 1998, 1992, 1994, 2007, 1998, …
$ RoofStyle     <chr> "Gable", "Hip", "Gable", "Gable", "Gable", "Gabl…
$ RoofMatl      <chr> "CompShg", "CompShg", "CompShg", "CompShg", "Com…
$ Exterior1st   <chr> "VinylSd", "Wd Sdng", "VinylSd", "VinylSd", "HdB…
$ Exterior2nd   <chr> "VinylSd", "Wd Sdng", "VinylSd", "VinylSd", "HdB…
$ MasVnrType    <chr> "None", "BrkFace", "None", "BrkFace", "None", "N…
$ MasVnrArea    <int> 0, 108, 0, 20, 0, 0, 0, 0, 0, 0, 0, 504, 492, 0,…
$ ExterQual     <chr> "TA", "TA", "TA", "TA", "Gd", "TA", "TA", "TA", …
$ ExterCond     <chr> "TA", "TA", "TA", "TA", "TA", "TA", "Gd", "TA", …
$ Foundation    <chr> "CBlock", "CBlock", "PConc", "PConc", "PConc", "…
$ BsmtQual      <chr> "TA", "TA", "Gd", "TA", "Gd", "Gd", "Gd", "Gd", …
$ BsmtCond      <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
$ BsmtExposure  <chr> "No", "No", "No", "No", "No", "No", "No", "No", …
$ BsmtFinType1  <chr> "Rec", "ALQ", "GLQ", "GLQ", "ALQ", "Unf", "ALQ",…
$ BsmtFinSF1    <int> 468, 923, 791, 602, 263, 0, 935, 0, 637, 804, 10…
$ BsmtFinType2  <chr> "LwQ", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf",…
$ BsmtFinSF2    <int> 144, 0, 0, 0, 0, 0, 0, 0, 0, 78, 0, 0, 0, 0, 0, …
$ BsmtUnfSF     <int> 270, 406, 137, 324, 1017, 763, 233, 789, 663, 0,…
$ TotalBsmtSF   <int> 882, 1329, 928, 926, 1280, 763, 1168, 789, 1300,…
$ Heating       <chr> "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", …
$ HeatingQC     <chr> "TA", "TA", "Gd", "Ex", "Ex", "Gd", "Ex", "Gd", …
$ CentralAir    <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"…
$ Electrical    <chr> "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SB…
$ X1stFlrSF     <int> 896, 1329, 928, 926, 1280, 763, 1187, 789, 1341,…
$ X2ndFlrSF     <int> 0, 0, 701, 678, 0, 892, 0, 676, 0, 0, 0, 504, 56…
$ LowQualFinSF  <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ GrLivArea     <int> 896, 1329, 1629, 1604, 1280, 1655, 1187, 1465, 1…
$ BsmtFullBath  <int> 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, …
$ BsmtHalfBath  <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ FullBath      <int> 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, 2, 1, 2, …
$ HalfBath      <int> 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, …
$ BedroomAbvGr  <int> 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 3, 3, 2, 3, …
$ KitchenAbvGr  <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ KitchenQual   <chr> "TA", "Gd", "TA", "Gd", "Gd", "TA", "TA", "TA", …
$ TotRmsAbvGrd  <int> 5, 6, 6, 7, 5, 7, 6, 7, 5, 4, 5, 5, 6, 6, 4, 10,…
$ Functional    <chr> "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ",…
$ Fireplaces    <int> 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, …
$ FireplaceQu   <chr> NA, NA, "TA", "Gd", NA, "TA", NA, "Gd", "Po", NA…
$ GarageType    <chr> "Attchd", "Attchd", "Attchd", "Attchd", "Attchd"…
$ GarageYrBlt   <int> 1961, 1958, 1997, 1998, 1992, 1993, 1992, 1998, …
$ GarageFinish  <chr> "Unf", "Unf", "Fin", "Fin", "RFn", "Fin", "Fin",…
$ GarageCars    <int> 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 3, …
$ GarageArea    <int> 730, 312, 482, 470, 506, 440, 420, 393, 506, 525…
$ GarageQual    <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
$ GarageCond    <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
$ PavedDrive    <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"…
$ WoodDeckSF    <int> 140, 393, 212, 360, 0, 157, 483, 0, 192, 240, 20…
$ OpenPorchSF   <int> 0, 36, 34, 36, 82, 84, 21, 75, 0, 0, 68, 0, 0, 0…
$ EnclosedPorch <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ X3SsnPorch    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ ScreenPorch   <int> 120, 0, 0, 0, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ PoolArea      <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ PoolQC        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ Fence         <chr> "MnPrv", NA, "MnPrv", NA, NA, NA, "GdPrv", NA, N…
$ MiscFeature   <chr> NA, "Gar2", NA, NA, NA, NA, "Shed", NA, NA, NA, …
$ MiscVal       <int> 0, 12500, 0, 0, 0, 0, 500, 0, 0, 0, 0, 0, 0, 0, …
$ MoSold        <int> 6, 6, 3, 6, 1, 4, 3, 5, 2, 4, 6, 2, 3, 6, 6, 1, …
$ YrSold        <int> 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, …
$ SaleType      <chr> "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", …
$ SaleCondition <chr> "Normal", "Normal", "Normal", "Normal", "Normal"…
# separando os dados de teste em quanti e quali

test_quanti <- select_if(test, is.numeric)

test_quali <- select_if(test, is.character)

# Visualizando o resultado

glimpse(test_quali)
Rows: 1,459
Columns: 43
$ MSZoning      <chr> "RH", "RL", "RL", "RL", "RL", "RL", "RL", "RL", …
$ Street        <chr> "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", …
$ Alley         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ LotShape      <chr> "Reg", "IR1", "IR1", "IR1", "IR1", "IR1", "IR1",…
$ LandContour   <chr> "Lvl", "Lvl", "Lvl", "Lvl", "HLS", "Lvl", "Lvl",…
$ Utilities     <chr> "AllPub", "AllPub", "AllPub", "AllPub", "AllPub"…
$ LotConfig     <chr> "Inside", "Corner", "Inside", "Inside", "Inside"…
$ LandSlope     <chr> "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl",…
$ Neighborhood  <chr> "NAmes", "NAmes", "Gilbert", "Gilbert", "StoneBr…
$ Condition1    <chr> "Feedr", "Norm", "Norm", "Norm", "Norm", "Norm",…
$ Condition2    <chr> "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", …
$ BldgType      <chr> "1Fam", "1Fam", "1Fam", "1Fam", "TwnhsE", "1Fam"…
$ HouseStyle    <chr> "1Story", "1Story", "2Story", "2Story", "1Story"…
$ RoofStyle     <chr> "Gable", "Hip", "Gable", "Gable", "Gable", "Gabl…
$ RoofMatl      <chr> "CompShg", "CompShg", "CompShg", "CompShg", "Com…
$ Exterior1st   <chr> "VinylSd", "Wd Sdng", "VinylSd", "VinylSd", "HdB…
$ Exterior2nd   <chr> "VinylSd", "Wd Sdng", "VinylSd", "VinylSd", "HdB…
$ MasVnrType    <chr> "None", "BrkFace", "None", "BrkFace", "None", "N…
$ ExterQual     <chr> "TA", "TA", "TA", "TA", "Gd", "TA", "TA", "TA", …
$ ExterCond     <chr> "TA", "TA", "TA", "TA", "TA", "TA", "Gd", "TA", …
$ Foundation    <chr> "CBlock", "CBlock", "PConc", "PConc", "PConc", "…
$ BsmtQual      <chr> "TA", "TA", "Gd", "TA", "Gd", "Gd", "Gd", "Gd", …
$ BsmtCond      <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
$ BsmtExposure  <chr> "No", "No", "No", "No", "No", "No", "No", "No", …
$ BsmtFinType1  <chr> "Rec", "ALQ", "GLQ", "GLQ", "ALQ", "Unf", "ALQ",…
$ BsmtFinType2  <chr> "LwQ", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf",…
$ Heating       <chr> "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", …
$ HeatingQC     <chr> "TA", "TA", "Gd", "Ex", "Ex", "Gd", "Ex", "Gd", …
$ CentralAir    <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"…
$ Electrical    <chr> "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SB…
$ KitchenQual   <chr> "TA", "Gd", "TA", "Gd", "Gd", "TA", "TA", "TA", …
$ Functional    <chr> "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ",…
$ FireplaceQu   <chr> NA, NA, "TA", "Gd", NA, "TA", NA, "Gd", "Po", NA…
$ GarageType    <chr> "Attchd", "Attchd", "Attchd", "Attchd", "Attchd"…
$ GarageFinish  <chr> "Unf", "Unf", "Fin", "Fin", "RFn", "Fin", "Fin",…
$ GarageQual    <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
$ GarageCond    <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
$ PavedDrive    <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"…
$ PoolQC        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ Fence         <chr> "MnPrv", NA, "MnPrv", NA, NA, NA, "GdPrv", NA, N…
$ MiscFeature   <chr> NA, "Gar2", NA, NA, NA, NA, "Shed", NA, NA, NA, …
$ SaleType      <chr> "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", …
$ SaleCondition <chr> "Normal", "Normal", "Normal", "Normal", "Normal"…
glimpse(test_quanti)
Rows: 1,459
Columns: 37
$ Id            <int> 1461, 1462, 1463, 1464, 1465, 1466, 1467, 1468, …
$ MSSubClass    <int> 20, 20, 60, 60, 120, 60, 20, 60, 20, 20, 120, 16…
$ LotFrontage   <int> 80, 81, 74, 78, 43, 75, NA, 63, 85, 70, 26, 21, …
$ LotArea       <int> 11622, 14267, 13830, 9978, 5005, 10000, 7980, 84…
$ OverallQual   <int> 5, 6, 5, 6, 8, 6, 6, 6, 7, 4, 7, 6, 5, 6, 7, 9, …
$ OverallCond   <int> 6, 6, 5, 6, 5, 5, 7, 5, 5, 5, 5, 5, 5, 6, 6, 5, …
$ YearBuilt     <int> 1961, 1958, 1997, 1998, 1992, 1993, 1992, 1998, …
$ YearRemodAdd  <int> 1961, 1958, 1998, 1998, 1992, 1994, 2007, 1998, …
$ MasVnrArea    <int> 0, 108, 0, 20, 0, 0, 0, 0, 0, 0, 0, 504, 492, 0,…
$ BsmtFinSF1    <int> 468, 923, 791, 602, 263, 0, 935, 0, 637, 804, 10…
$ BsmtFinSF2    <int> 144, 0, 0, 0, 0, 0, 0, 0, 0, 78, 0, 0, 0, 0, 0, …
$ BsmtUnfSF     <int> 270, 406, 137, 324, 1017, 763, 233, 789, 663, 0,…
$ TotalBsmtSF   <int> 882, 1329, 928, 926, 1280, 763, 1168, 789, 1300,…
$ X1stFlrSF     <int> 896, 1329, 928, 926, 1280, 763, 1187, 789, 1341,…
$ X2ndFlrSF     <int> 0, 0, 701, 678, 0, 892, 0, 676, 0, 0, 0, 504, 56…
$ LowQualFinSF  <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ GrLivArea     <int> 896, 1329, 1629, 1604, 1280, 1655, 1187, 1465, 1…
$ BsmtFullBath  <int> 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, …
$ BsmtHalfBath  <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ FullBath      <int> 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, 2, 1, 2, …
$ HalfBath      <int> 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, …
$ BedroomAbvGr  <int> 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 3, 3, 2, 3, …
$ KitchenAbvGr  <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ TotRmsAbvGrd  <int> 5, 6, 6, 7, 5, 7, 6, 7, 5, 4, 5, 5, 6, 6, 4, 10,…
$ Fireplaces    <int> 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, …
$ GarageYrBlt   <int> 1961, 1958, 1997, 1998, 1992, 1993, 1992, 1998, …
$ GarageCars    <int> 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 3, …
$ GarageArea    <int> 730, 312, 482, 470, 506, 440, 420, 393, 506, 525…
$ WoodDeckSF    <int> 140, 393, 212, 360, 0, 157, 483, 0, 192, 240, 20…
$ OpenPorchSF   <int> 0, 36, 34, 36, 82, 84, 21, 75, 0, 0, 68, 0, 0, 0…
$ EnclosedPorch <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ X3SsnPorch    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ ScreenPorch   <int> 120, 0, 0, 0, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ PoolArea      <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ MiscVal       <int> 0, 12500, 0, 0, 0, 0, 500, 0, 0, 0, 0, 0, 0, 0, …
$ MoSold        <int> 6, 6, 3, 6, 1, 4, 3, 5, 2, 4, 6, 2, 3, 6, 6, 1, …
$ YrSold        <int> 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, …
# Tratando os dados qualitativos

colSums(is.na(test_quali))
     MSZoning        Street         Alley      LotShape   LandContour 
            4             0          1352             0             0 
    Utilities     LotConfig     LandSlope  Neighborhood    Condition1 
            2             0             0             0             0 
   Condition2      BldgType    HouseStyle     RoofStyle      RoofMatl 
            0             0             0             0             0 
  Exterior1st   Exterior2nd    MasVnrType     ExterQual     ExterCond 
            1             1            16             0             0 
   Foundation      BsmtQual      BsmtCond  BsmtExposure  BsmtFinType1 
            0            44            45            44            42 
 BsmtFinType2       Heating     HeatingQC    CentralAir    Electrical 
           42             0             0             0             0 
  KitchenQual    Functional   FireplaceQu    GarageType  GarageFinish 
            1             2           730            76            78 
   GarageQual    GarageCond    PavedDrive        PoolQC         Fence 
           78            78             0          1456          1169 
  MiscFeature      SaleType SaleCondition 
         1408             1             0 
# Observe que temos os mesmos problemas, existem variável que são praticamente nulas, sendo
# a melhor escolha deletar elas do nosso banco de dados. 

test_quali <- test_quali %>% select(-c(Alley, PoolQC, Fence, MiscFeature))
glimpse(test_quali)
Rows: 1,459
Columns: 39
$ MSZoning      <chr> "RH", "RL", "RL", "RL", "RL", "RL", "RL", "RL", …
$ Street        <chr> "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", …
$ LotShape      <chr> "Reg", "IR1", "IR1", "IR1", "IR1", "IR1", "IR1",…
$ LandContour   <chr> "Lvl", "Lvl", "Lvl", "Lvl", "HLS", "Lvl", "Lvl",…
$ Utilities     <chr> "AllPub", "AllPub", "AllPub", "AllPub", "AllPub"…
$ LotConfig     <chr> "Inside", "Corner", "Inside", "Inside", "Inside"…
$ LandSlope     <chr> "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl",…
$ Neighborhood  <chr> "NAmes", "NAmes", "Gilbert", "Gilbert", "StoneBr…
$ Condition1    <chr> "Feedr", "Norm", "Norm", "Norm", "Norm", "Norm",…
$ Condition2    <chr> "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", …
$ BldgType      <chr> "1Fam", "1Fam", "1Fam", "1Fam", "TwnhsE", "1Fam"…
$ HouseStyle    <chr> "1Story", "1Story", "2Story", "2Story", "1Story"…
$ RoofStyle     <chr> "Gable", "Hip", "Gable", "Gable", "Gable", "Gabl…
$ RoofMatl      <chr> "CompShg", "CompShg", "CompShg", "CompShg", "Com…
$ Exterior1st   <chr> "VinylSd", "Wd Sdng", "VinylSd", "VinylSd", "HdB…
$ Exterior2nd   <chr> "VinylSd", "Wd Sdng", "VinylSd", "VinylSd", "HdB…
$ MasVnrType    <chr> "None", "BrkFace", "None", "BrkFace", "None", "N…
$ ExterQual     <chr> "TA", "TA", "TA", "TA", "Gd", "TA", "TA", "TA", …
$ ExterCond     <chr> "TA", "TA", "TA", "TA", "TA", "TA", "Gd", "TA", …
$ Foundation    <chr> "CBlock", "CBlock", "PConc", "PConc", "PConc", "…
$ BsmtQual      <chr> "TA", "TA", "Gd", "TA", "Gd", "Gd", "Gd", "Gd", …
$ BsmtCond      <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
$ BsmtExposure  <chr> "No", "No", "No", "No", "No", "No", "No", "No", …
$ BsmtFinType1  <chr> "Rec", "ALQ", "GLQ", "GLQ", "ALQ", "Unf", "ALQ",…
$ BsmtFinType2  <chr> "LwQ", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf",…
$ Heating       <chr> "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", …
$ HeatingQC     <chr> "TA", "TA", "Gd", "Ex", "Ex", "Gd", "Ex", "Gd", …
$ CentralAir    <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"…
$ Electrical    <chr> "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SB…
$ KitchenQual   <chr> "TA", "Gd", "TA", "Gd", "Gd", "TA", "TA", "TA", …
$ Functional    <chr> "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ",…
$ FireplaceQu   <chr> NA, NA, "TA", "Gd", NA, "TA", NA, "Gd", "Po", NA…
$ GarageType    <chr> "Attchd", "Attchd", "Attchd", "Attchd", "Attchd"…
$ GarageFinish  <chr> "Unf", "Unf", "Fin", "Fin", "RFn", "Fin", "Fin",…
$ GarageQual    <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
$ GarageCond    <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
$ PavedDrive    <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"…
$ SaleType      <chr> "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", …
$ SaleCondition <chr> "Normal", "Normal", "Normal", "Normal", "Normal"…
# Aplicando o mesmo loop for para tratar os dado de teste qualitativos.

for(item in colnames(test_quali)){
  #print(item) 
  x = data.frame(sort(table(test_quali[item])))
  #print(x)
  mode = tail(x, n = 1)
  #print(mode)
  moda = (mode[1,1])
  moda <- as.character(moda)
  #print(train_quali[item])
  test_quali <- test_quali %>% 
  mutate(across(item, ~ case_when(is.na(.) ~ moda, TRUE ~ .)))
}

# Conferindo a soma dos valores nulos nas colunas após a transformção 
unique(test_quali$MasVnrType)
[1] "None"    "BrkFace" "Stone"   "BrkCmn" 
table(test_quali$MasVnrType)

 BrkCmn BrkFace    None   Stone 
     10     434     894     121 
colSums(is.na(test_quali))
     MSZoning        Street      LotShape   LandContour     Utilities 
            0             0             0             0             0 
    LotConfig     LandSlope  Neighborhood    Condition1    Condition2 
            0             0             0             0             0 
     BldgType    HouseStyle     RoofStyle      RoofMatl   Exterior1st 
            0             0             0             0             0 
  Exterior2nd    MasVnrType     ExterQual     ExterCond    Foundation 
            0             0             0             0             0 
     BsmtQual      BsmtCond  BsmtExposure  BsmtFinType1  BsmtFinType2 
            0             0             0             0             0 
      Heating     HeatingQC    CentralAir    Electrical   KitchenQual 
            0             0             0             0             0 
   Functional   FireplaceQu    GarageType  GarageFinish    GarageQual 
            0             0             0             0             0 
   GarageCond    PavedDrive      SaleType SaleCondition 
            0             0             0             0 
#############################

# Tratando os dados quantitativos 

# visuaizando os dados
glimpse(test_quanti)
Rows: 1,459
Columns: 37
$ Id            <int> 1461, 1462, 1463, 1464, 1465, 1466, 1467, 1468, …
$ MSSubClass    <int> 20, 20, 60, 60, 120, 60, 20, 60, 20, 20, 120, 16…
$ LotFrontage   <int> 80, 81, 74, 78, 43, 75, NA, 63, 85, 70, 26, 21, …
$ LotArea       <int> 11622, 14267, 13830, 9978, 5005, 10000, 7980, 84…
$ OverallQual   <int> 5, 6, 5, 6, 8, 6, 6, 6, 7, 4, 7, 6, 5, 6, 7, 9, …
$ OverallCond   <int> 6, 6, 5, 6, 5, 5, 7, 5, 5, 5, 5, 5, 5, 6, 6, 5, …
$ YearBuilt     <int> 1961, 1958, 1997, 1998, 1992, 1993, 1992, 1998, …
$ YearRemodAdd  <int> 1961, 1958, 1998, 1998, 1992, 1994, 2007, 1998, …
$ MasVnrArea    <int> 0, 108, 0, 20, 0, 0, 0, 0, 0, 0, 0, 504, 492, 0,…
$ BsmtFinSF1    <int> 468, 923, 791, 602, 263, 0, 935, 0, 637, 804, 10…
$ BsmtFinSF2    <int> 144, 0, 0, 0, 0, 0, 0, 0, 0, 78, 0, 0, 0, 0, 0, …
$ BsmtUnfSF     <int> 270, 406, 137, 324, 1017, 763, 233, 789, 663, 0,…
$ TotalBsmtSF   <int> 882, 1329, 928, 926, 1280, 763, 1168, 789, 1300,…
$ X1stFlrSF     <int> 896, 1329, 928, 926, 1280, 763, 1187, 789, 1341,…
$ X2ndFlrSF     <int> 0, 0, 701, 678, 0, 892, 0, 676, 0, 0, 0, 504, 56…
$ LowQualFinSF  <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ GrLivArea     <int> 896, 1329, 1629, 1604, 1280, 1655, 1187, 1465, 1…
$ BsmtFullBath  <int> 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, …
$ BsmtHalfBath  <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ FullBath      <int> 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, 2, 1, 2, …
$ HalfBath      <int> 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, …
$ BedroomAbvGr  <int> 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 3, 3, 2, 3, …
$ KitchenAbvGr  <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ TotRmsAbvGrd  <int> 5, 6, 6, 7, 5, 7, 6, 7, 5, 4, 5, 5, 6, 6, 4, 10,…
$ Fireplaces    <int> 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, …
$ GarageYrBlt   <int> 1961, 1958, 1997, 1998, 1992, 1993, 1992, 1998, …
$ GarageCars    <int> 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 3, …
$ GarageArea    <int> 730, 312, 482, 470, 506, 440, 420, 393, 506, 525…
$ WoodDeckSF    <int> 140, 393, 212, 360, 0, 157, 483, 0, 192, 240, 20…
$ OpenPorchSF   <int> 0, 36, 34, 36, 82, 84, 21, 75, 0, 0, 68, 0, 0, 0…
$ EnclosedPorch <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ X3SsnPorch    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ ScreenPorch   <int> 120, 0, 0, 0, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ PoolArea      <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ MiscVal       <int> 0, 12500, 0, 0, 0, 0, 500, 0, 0, 0, 0, 0, 0, 0, …
$ MoSold        <int> 6, 6, 3, 6, 1, 4, 3, 5, 2, 4, 6, 2, 3, 6, 6, 1, …
$ YrSold        <int> 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, …
# Conferindo os valores ausentes

colSums(is.na(test_quanti))
           Id    MSSubClass   LotFrontage       LotArea   OverallQual 
            0             0           227             0             0 
  OverallCond     YearBuilt  YearRemodAdd    MasVnrArea    BsmtFinSF1 
            0             0             0            15             1 
   BsmtFinSF2     BsmtUnfSF   TotalBsmtSF     X1stFlrSF     X2ndFlrSF 
            1             1             1             0             0 
 LowQualFinSF     GrLivArea  BsmtFullBath  BsmtHalfBath      FullBath 
            0             0             2             2             0 
     HalfBath  BedroomAbvGr  KitchenAbvGr  TotRmsAbvGrd    Fireplaces 
            0             0             0             0             0 
  GarageYrBlt    GarageCars    GarageArea    WoodDeckSF   OpenPorchSF 
           78             1             1             0             0 
EnclosedPorch    X3SsnPorch   ScreenPorch      PoolArea       MiscVal 
            0             0             0             0             0 
       MoSold        YrSold 
            0             0 
# Ao todo temos 8 variaveis que apresentam valores ausentes.
test_quanti <- test_quanti %>%    
  mutate(LotFrontage=coalesce(LotFrontage,
                              mean(x =test_quanti$LotFrontage, na.rm = T )))
test_quanti <- test_quanti %>%    
  mutate(MasVnrArea=coalesce(MasVnrArea,
                              mean(x =test_quanti$MasVnrArea, na.rm = T )))
test_quanti <- test_quanti %>%    
  mutate(BsmtFinSF1 =coalesce(BsmtFinSF1 ,
                              mean(x =test_quanti$BsmtFinSF1 , na.rm = T )))

test_quanti <- test_quanti %>%    
  mutate(BsmtFinSF2 =coalesce(BsmtFinSF2 ,
                              mean(x =test_quanti$BsmtFinSF2 , na.rm = T )))

test_quanti <- test_quanti %>%    
  mutate(BsmtUnfSF =coalesce(BsmtUnfSF ,
                              mean(x =test_quanti$BsmtUnfSF , na.rm = T )))


test_quanti <- test_quanti %>%    
  mutate(TotalBsmtSF =coalesce(TotalBsmtSF ,
                              mean(x =test_quanti$TotalBsmtSF , na.rm = T )))

test_quanti <- test_quanti %>%    
  mutate(BsmtFullBath =coalesce(BsmtFullBath ,
                              mean(x =test_quanti$BsmtFullBath , na.rm = T )))

test_quanti <- test_quanti %>%    
  mutate(BsmtHalfBath =coalesce(BsmtHalfBath ,
                              mean(x =test_quanti$BsmtHalfBath , na.rm = T )))


test_quanti <- test_quanti %>%    
  mutate(GarageYrBlt =coalesce(GarageYrBlt ,
                              mean(x =test_quanti$GarageYrBlt , na.rm = T )))


test_quanti <- test_quanti %>%    
  mutate(GarageCars =coalesce(GarageCars ,
                              mean(x =test_quanti$GarageCars , na.rm = T )))


test_quanti <- test_quanti %>%    
  mutate(GarageArea =coalesce(GarageArea ,
                              mean(x =test_quanti$GarageArea , na.rm = T )))

colSums(is.na(test_quanti))
           Id    MSSubClass   LotFrontage       LotArea   OverallQual 
            0             0             0             0             0 
  OverallCond     YearBuilt  YearRemodAdd    MasVnrArea    BsmtFinSF1 
            0             0             0             0             0 
   BsmtFinSF2     BsmtUnfSF   TotalBsmtSF     X1stFlrSF     X2ndFlrSF 
            0             0             0             0             0 
 LowQualFinSF     GrLivArea  BsmtFullBath  BsmtHalfBath      FullBath 
            0             0             0             0             0 
     HalfBath  BedroomAbvGr  KitchenAbvGr  TotRmsAbvGrd    Fireplaces 
            0             0             0             0             0 
  GarageYrBlt    GarageCars    GarageArea    WoodDeckSF   OpenPorchSF 
            0             0             0             0             0 
EnclosedPorch    X3SsnPorch   ScreenPorch      PoolArea       MiscVal 
            0             0             0             0             0 
       MoSold        YrSold 
            0             0 
# Juntando os dataframes

test.clean <- cbind(test_quali,test_quanti)

Por fim, cabe agora conferir os dados de treino e teste tratados


colSums(is.na(train.clean))
colSums(is.na(test.clean))

7. Análise Exploratória de Dados.

Nesta etapa iniciaremos uma analise descritiva tanto das variáveis qualitativas como quantitativas do banco de dados de treino. Para os dados quantitativos abordaremos em primeira instancia uma investigação por estatistica decritiva univariada e depois bivariada para entender a sua relação com a variavel Y do problema que é a SalesPrice.

7.1 Estatistica Descritiva para dados Qualitativos.

A estatistica univariada para contempla tabelas de ferequencia de ocorrencia o que já vimos anteriormente ao obtermos a moda, representação gráfica da distribuição e medidas de localização, dispersão ou variabilidade e medidas de forma(assimetria e curtosis).

A tabela de distribuição de frequências é calculada para cada valor discreto da variável. A frequência pode ser absoluta que informa o número de ocorrências de cada elemento i na amostra, pode ser uma frequência relativa que fornece a porcentagem % relativa à frequência absoluta, pode ser uma frequência acumulada que representa a soma de todos os elementos e por fim pode ser uma frequência relativa acumulada que é a frequencia relativa à acumulada.

library(plotly)
Registered S3 method overwritten by 'data.table':
  method           from
  print.data.table     
Registered S3 method overwritten by 'htmlwidgets':
  method           from         
  print.htmlwidget tools:rstudio

Attaching package: ‘plotly’

The following object is masked from ‘package:ggplot2’:

    last_plot

The following object is masked from ‘package:stats’:

    filter

The following object is masked from ‘package:graphics’:

    layout

Desta maneira criamos uma forma eficiente de exploramos o banco de dados para as variáveis categóricas. Como temos 39 variáveis exploratórias o melhor a fazer é criar um função que nos retorne essas três informações, a tabela de frequência, o grafico de barras verticais e grafico de barras empilhadas.

Então vamos colocar a mão na massa!!

resultado_HouseStyle
[[1]]

[[2]]

[[3]]
NA

8. Análise exploratória dos dados quantitativos

A análise exploratória de dados quantitativos os principais gráficos que irão trazer informações importantes são de linha, dispersão, histograma e box plot.

Vamos então analisar o nosso banco de dados de treino que contem as variáveis quantitativas, e além de fazer essas visualizações também iremos inserir os gráficos de correlação e heatmap.

A nossa variável Y SalePrice, ou seja a que queremos prever, é qualitativa então vamos começar a análise por ela e sugerir posteriores correlações com as outras variáveis.

Usaremos o pacote e1071 para obtermos os gráficos de densidade e análisarmos as assimetrias da distribuição de algumas Variáveis.

# Olhando os nomes das variáveis esse bancod de dados quantitativos.
names(train_quanti)
 [1] "Id"            "MSSubClass"    "LotFrontage"   "LotArea"       "OverallQual"  
 [6] "OverallCond"   "YearBuilt"     "YearRemodAdd"  "MasVnrArea"    "BsmtFinSF1"   
[11] "BsmtFinSF2"    "BsmtUnfSF"     "TotalBsmtSF"   "X1stFlrSF"     "X2ndFlrSF"    
[16] "LowQualFinSF"  "GrLivArea"     "BsmtFullBath"  "BsmtHalfBath"  "FullBath"     
[21] "HalfBath"      "BedroomAbvGr"  "KitchenAbvGr"  "TotRmsAbvGrd"  "Fireplaces"   
[26] "GarageYrBlt"   "GarageCars"    "GarageArea"    "WoodDeckSF"    "OpenPorchSF"  
[31] "EnclosedPorch" "X3SsnPorch"    "ScreenPorch"   "PoolArea"      "MiscVal"      
[36] "MoSold"        "YrSold"        "SalePrice"    
# Extraindo o sumário estatistico

summary(train_quanti)
       Id           MSSubClass     LotFrontage        LotArea        OverallQual    
 Min.   :   1.0   Min.   : 20.0   Min.   : 21.00   Min.   :  1300   Min.   : 1.000  
 1st Qu.: 365.8   1st Qu.: 20.0   1st Qu.: 60.00   1st Qu.:  7554   1st Qu.: 5.000  
 Median : 730.5   Median : 50.0   Median : 70.05   Median :  9478   Median : 6.000  
 Mean   : 730.5   Mean   : 56.9   Mean   : 70.05   Mean   : 10517   Mean   : 6.099  
 3rd Qu.:1095.2   3rd Qu.: 70.0   3rd Qu.: 79.00   3rd Qu.: 11602   3rd Qu.: 7.000  
 Max.   :1460.0   Max.   :190.0   Max.   :313.00   Max.   :215245   Max.   :10.000  
  OverallCond      YearBuilt     YearRemodAdd    MasVnrArea       BsmtFinSF1    
 Min.   :1.000   Min.   :1872   Min.   :1950   Min.   :   0.0   Min.   :   0.0  
 1st Qu.:5.000   1st Qu.:1954   1st Qu.:1967   1st Qu.:   0.0   1st Qu.:   0.0  
 Median :5.000   Median :1973   Median :1994   Median :   0.0   Median : 383.5  
 Mean   :5.575   Mean   :1971   Mean   :1985   Mean   : 103.7   Mean   : 443.6  
 3rd Qu.:6.000   3rd Qu.:2000   3rd Qu.:2004   3rd Qu.: 164.2   3rd Qu.: 712.2  
 Max.   :9.000   Max.   :2010   Max.   :2010   Max.   :1600.0   Max.   :5644.0  
   BsmtFinSF2        BsmtUnfSF       TotalBsmtSF       X1stFlrSF      X2ndFlrSF   
 Min.   :   0.00   Min.   :   0.0   Min.   :   0.0   Min.   : 334   Min.   :   0  
 1st Qu.:   0.00   1st Qu.: 223.0   1st Qu.: 795.8   1st Qu.: 882   1st Qu.:   0  
 Median :   0.00   Median : 477.5   Median : 991.5   Median :1087   Median :   0  
 Mean   :  46.55   Mean   : 567.2   Mean   :1057.4   Mean   :1163   Mean   : 347  
 3rd Qu.:   0.00   3rd Qu.: 808.0   3rd Qu.:1298.2   3rd Qu.:1391   3rd Qu.: 728  
 Max.   :1474.00   Max.   :2336.0   Max.   :6110.0   Max.   :4692   Max.   :2065  
  LowQualFinSF       GrLivArea     BsmtFullBath     BsmtHalfBath        FullBath    
 Min.   :  0.000   Min.   : 334   Min.   :0.0000   Min.   :0.00000   Min.   :0.000  
 1st Qu.:  0.000   1st Qu.:1130   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:1.000  
 Median :  0.000   Median :1464   Median :0.0000   Median :0.00000   Median :2.000  
 Mean   :  5.845   Mean   :1515   Mean   :0.4253   Mean   :0.05753   Mean   :1.565  
 3rd Qu.:  0.000   3rd Qu.:1777   3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:2.000  
 Max.   :572.000   Max.   :5642   Max.   :3.0000   Max.   :2.00000   Max.   :3.000  
    HalfBath       BedroomAbvGr    KitchenAbvGr    TotRmsAbvGrd      Fireplaces   
 Min.   :0.0000   Min.   :0.000   Min.   :0.000   Min.   : 2.000   Min.   :0.000  
 1st Qu.:0.0000   1st Qu.:2.000   1st Qu.:1.000   1st Qu.: 5.000   1st Qu.:0.000  
 Median :0.0000   Median :3.000   Median :1.000   Median : 6.000   Median :1.000  
 Mean   :0.3829   Mean   :2.866   Mean   :1.047   Mean   : 6.518   Mean   :0.613  
 3rd Qu.:1.0000   3rd Qu.:3.000   3rd Qu.:1.000   3rd Qu.: 7.000   3rd Qu.:1.000  
 Max.   :2.0000   Max.   :8.000   Max.   :3.000   Max.   :14.000   Max.   :3.000  
  GarageYrBlt     GarageCars      GarageArea       WoodDeckSF      OpenPorchSF    
 Min.   :1900   Min.   :0.000   Min.   :   0.0   Min.   :  0.00   Min.   :  0.00  
 1st Qu.:1962   1st Qu.:1.000   1st Qu.: 334.5   1st Qu.:  0.00   1st Qu.:  0.00  
 Median :1979   Median :2.000   Median : 480.0   Median :  0.00   Median : 25.00  
 Mean   :1979   Mean   :1.767   Mean   : 473.0   Mean   : 94.24   Mean   : 46.66  
 3rd Qu.:2001   3rd Qu.:2.000   3rd Qu.: 576.0   3rd Qu.:168.00   3rd Qu.: 68.00  
 Max.   :2010   Max.   :4.000   Max.   :1418.0   Max.   :857.00   Max.   :547.00  
 EnclosedPorch      X3SsnPorch      ScreenPorch        PoolArea          MiscVal        
 Min.   :  0.00   Min.   :  0.00   Min.   :  0.00   Min.   :  0.000   Min.   :    0.00  
 1st Qu.:  0.00   1st Qu.:  0.00   1st Qu.:  0.00   1st Qu.:  0.000   1st Qu.:    0.00  
 Median :  0.00   Median :  0.00   Median :  0.00   Median :  0.000   Median :    0.00  
 Mean   : 21.95   Mean   :  3.41   Mean   : 15.06   Mean   :  2.759   Mean   :   43.49  
 3rd Qu.:  0.00   3rd Qu.:  0.00   3rd Qu.:  0.00   3rd Qu.:  0.000   3rd Qu.:    0.00  
 Max.   :552.00   Max.   :508.00   Max.   :480.00   Max.   :738.000   Max.   :15500.00  
     MoSold           YrSold       SalePrice     
 Min.   : 1.000   Min.   :2006   Min.   : 34900  
 1st Qu.: 5.000   1st Qu.:2007   1st Qu.:129975  
 Median : 6.000   Median :2008   Median :163000  
 Mean   : 6.322   Mean   :2008   Mean   :180921  
 3rd Qu.: 8.000   3rd Qu.:2009   3rd Qu.:214000  
 Max.   :12.000   Max.   :2010   Max.   :755000  
library(e1071)

par(mfrow=c(3,2)) 

# Preço do Imóvel
plot(density(train_quanti$SalePrice), main="Density Plot: Sale Price", ylab="Frequency",
     sub=paste("Assimetria:", round(e1071::skewness(train_quanti$SalePrice), 2)))  
polygon(density(train_quanti$SalePrice), col="green")

# Número de quartos
plot(density(train_quanti$BedroomAbvGr), main="Density Plot: Bedroom", ylab="Frequency",
     sub=paste("Assimetria:", round(e1071::skewness(train_quanti$BedroomAbvGr), 2)))  
polygon(density(train_quanti$BedroomAbvGr), col="orange")

# Números de Banheiros
plot(density(train_quanti$FullBath), main="Density Plot: Bathroom", ylab="Frequency",
     sub=paste("Assimetria:", round(e1071::skewness(train_quanti$FullBath), 2)))  
polygon(density(train_quanti$FullBath), col="green")

# Área do terreno
plot(density(train_quanti$LotArea), main="Density Plot: Lot Area in feet", ylab="Frequency",
     sub=paste("Assimetria:", round(e1071::skewness(train_quanti$LotArea), 2)))  
polygon(density(train_quanti$LotArea), col="orange")

# Ano de Construção
plot(density(train_quanti$YearBuilt), main="Density Plot: Year Built", ylab="Frequency",
     sub=paste("Assimetria:", round(e1071::skewness(train_quanti$YearBuilt), 2)))  
polygon(density(train_quanti$YearBuilt), col="green")


# Área da garagem
plot(density(train_quanti$GarageArea), main="Density Plot: Garage Area", ylab="Frequency",
     sub=paste("Assimetria:", round(e1071::skewness(train_quanti$GarageArea), 2)))  
polygon(density(train_quanti$GarageArea), col="orange")

NA
NA

Vamos analisar os preços dos imóveis de acordo com o número de quartos, números de banheiros e vagas de garagem. Para isso vamos utilizar o boxplot e ver a variação dos valores e se ocorrem outliers para esses pares, a biblioteca ggplot auxiliará nessa etapa.

library(plotly)

lista=c("FullBath","BedroomAbvGr","GarageCars")
legenda=c("Valor do imovel pela numero de banheiros",
          "Valor do imovel pelo numero de quartos",
          "Valor do imovel pelo numero de vagas")

plot.boxes<-function(X,legenda){
  ggplotly(
    ggplot(train_quanti,aes_string(x=X, y="SalePrice",group=X, fill = X))+
      geom_boxplot() + labs(title = legenda)+
    theme_classic()
  )
  
}

Map(plot.boxes,lista,legenda)
$FullBath

$BedroomAbvGr

$GarageCars
NA

De fato essas variáveis são bem interessante para analisarmos como elas se comportam com a variavel Y SalePrice. No entanto o mais importante agora é investigarmos as correlações entre as variáveis. Para isso vamos utilizar a correlação de Pearson que encontramos no pacote ggcorplot.

library(ggcorrplot)


corr <- round(cor(train_quanti), 1)

# Plot

ggplotly(
  ggcorrplot(corr,
           type = "lower",
           lab = F, 
           lab_size = 5,  
           colors = c("tomato2", "white", "springgreen3"),
           title="Correlogram of House Prices Kaggle Dataset", 
           ggtheme=theme_bw)
)
NA

Assim foi possivel observar quem possui maior correlação positiva ou negativa com a variável Sale Price. Vamos agora tentar roda uma regressão linear que possa se capaz de explicar o comportamento do preço do imóvel em relação as outras variáveis. Para isso vamos criar uma função

LS0tDQp0aXRsZTogIkhvdXNlIHByaWNlcyAtIEthZ2dsZSBjaGFsbGVuZ2UiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQojIDEuT2JqZXRpdm8gZG8gZGVzYWZpbw0KDQogIE8gb2JqZXRpdm8gZG8gZGVzYWZpbyBIb3VzZSBwcmljZXMgw6kgcHJldmVyIG8gdmFsb3IgZGUgdmVuZGEgZGUgdW0gaW3Ds3ZlbCBkZSBhY29yZG8gY29tIGFzIDc5IHZhcmnDoXZlaXMgZG8gYmFuY28gZGUgZGFkb3MuIE8gZGVzYWZpbyBkZXNwb25pYmlsaXphIDQgYXJxdWl2b3MsIHVtIGRhdGFzZXQgZGUgdHJlaW5vIGUgdW0gZGF0YXNldCBkZSB0ZXN0ZSBhbWJvcyBlbSBjc3YsIHVtIGFycXVpdm8gdHh0IGNvbSBhIGRlc2NyacOnw6NvIGRhcyBjb2x1bmFzIGRvIGRhdGFzZXQgdHJlaW5vIGUgdW0gYXJxdWl2byBjc3YgZXhlbXBsbyBkZSBjb21vIG8gcmVzdWx0YWRvIGRldmUgc2VyIHN1Ym1ldGlkby4NCiAgUGFyYSBhY2Vzc2FyIG1haXMgaW5mb3JtYcOnw7VlcyBzb2JyZSBvIGRlc2FmaW8gZSBmYXplciBkb3dubG9hZCBkb3MgZGFkb3MgYmFzdGEgYWNlc3NhciBvIFtsaW5rXShodHRwczovL3d3dy5rYWdnbGUuY29tL2NvbXBldGl0aW9ucy9ob3VzZS1wcmljZXMtYWR2YW5jZWQtcmVncmVzc2lvbi10ZWNobmlxdWVzL292ZXJ2aWV3KS4NCiAgDQojIDIuIExlbmRvIG9zIGFycXVpdm9zLg0KDQpgYGB7cn0NCiMgQ2FycmVnYW5kbyBvIHRpZHl2ZXJzZQ0KDQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCg0KIyBTZXRhbmRvIG8gZGlyZXTDs3JpbyBkZSB0cmFiYWxobw0Kc2V0d2QoZGlyID0gIkM6L1VzZXJzL3JvZG9sZm8ucGF1bGEvRGVza3RvcC9QRVNTT0FMX1JPRE9MRk8vQmV0dGVyIERlY2lzaW9ucy9zY3JpcHRzL2thZ2dsZS9ob3VzZV9wcmljZXMiKQ0KDQojIExlbmRvIG9zIGFycXVpdm9zIGRlIHRyZWlubyBlIGRlIHRlc3RlDQp0cmFpbiA9IGRhdGEuZnJhbWUocmVhZC5jc3YoInRyYWluLmNzdiIpKQ0KdGVzdCA9IGRhdGEuZnJhbWUocmVhZC5jc3YoInRlc3QuY3N2IikpDQoNCiMgVmlzdWFsaXphbmRvIGFzIGRpbWVuc8O1ZXMgZG9zIGFycXVpdm9zDQpkaW0odHJhaW4pDQpkaW0odGVzdCkNCg0KDQojIFZpc3VhbGl6YW5kbyBvIGRhdGFzZXQgZGUgdHJlaW5vIA0KZ2xpbXBzZSh0cmFpbikNCg0KIyBPYnRlbmRvIHVtIHN1bcOhcmlvIGRvcyBkYWRvcyBkZSB0cmVpbm8NCg0Kc3VtbWFyeSh0cmFpbikNCg0KYGBgDQpVdGlsaXphbmRvIG8gc3Vtw6FyaW8gZmljYSBjbGFybyBxdWUgbyBiYW5jbyBkZSBkYWRvcyBkZSB0cmVpbm8gcG9zc3VpIG11aXRhcyB2YXJpw6F2ZWlzLiBWYW1vcyBpbmNpYWxtZW50ZSBzZXBhcmFyIG8gYmFuY28gZGUgZGFkb3MgZW0gdmFyacOhdmVpcyBxdWFsaXRhdGl2YXMgZSB2YXJpw6F2ZWlzIHF1YW50aXRhdGl2YXMsIGFzc2ltIHNlIHRvcm5hIG1haXMgZmFjaWwgbyB0cmF0YW1lbnRvIGUgbGltcGV6YSBkb3MgZGFkb3MuDQoNClBhcmEgYXMgdmFyacOhdmVpcyBxdWFsaXRhdGl2YXMgcXVlIGFwcmVzZW50ZW0gdmFsb3JlcyBmYWx0YW50ZXMgb3UgTkFgcyB2YW1vcyBzdWJzdGl0dWlyIHBlbGEgbW9kYSBkYXF1ZWxhIHZhcmnDoXZlbCBlIHBhcmEgYXMgdmFyacOhdmVpcyBxdWFudGl0YXRpdmFzIGRlIHF1ZSBhcHJlc2VudGVtIHZhbG9yZXMgZmFsdGFudGVzIHZhbW9zIHN1YnN0aXR1aXIgcGVsYSBtw6lkaWEuIA0KDQpBIGNvbHVuYSBJZCBzZXLDoSBlbGltaW5hZGEgcG9yIHNlciB1bSB2YWxvciBzZXF1ZW5jaWFsIHF1ZSBuYW8gcmVwcmVzZW50YSBuZWh1bWEgaW5mb3JtYcOnw6NvIMO6dGlsIGNvbW8gdmFyacOhdmVsIGV4cGxpY2F0aXZhIGNhcGF6IGRlIGluZmx1ZW5jaWFyIG5vIHZhbG9yIHByZWRpdG8gZG8gcHJlw6dvIGRlIHZlbmRhIGRlIHVtYSBjYXNhLg0KDQpQYXJhIHNlcGFyYXIgbyBkYXRhc2V0IGVtIGRhZG9zIHF1YW50aXRhdGl2b3MgZSBxdWFsaXRhdGl2b3MgbyBtZWxob3IgY2FtaW5obyDDqSB1c2FyYSBhIGZ1bsOnw6NvIFtzZWxlY3RfaWYoKV0oaHR0cHM6Ly93d3cucmRvY3VtZW50YXRpb24ub3JnL3BhY2thZ2VzL2RwbHlyL3ZlcnNpb25zLzAuNS4wL3RvcGljcy9zZWxlY3RfaWYpIGRvIHBhY290ZSBkcGx5cg0KDQoNCmBgYHtyfQ0KDQojIFNlcGFyYW5kbyBvIGRhdGFmcmFtZSBlbSB0cmFpbiBxdWFudGkgZSB0cmFpbiBxdWFsaSB1c2FuZG8gYSBmdW7Dp8OjbyBzZWxlY3RfaWYNCg0KdHJhaW5fcXVhbnRpIDwtIHNlbGVjdF9pZih0cmFpbiwgaXMubnVtZXJpYykNCnRyYWluX3F1YWxpIDwtIHNlbGVjdF9pZih0cmFpbiwgaXMuY2hhcmFjdGVyKQ0KDQoNCmBgYA0KIyAzLiBMaW1wZXNhIGRlIGRhZG9zIFF1YWxpdGF0aXZvcw0KDQpgYGB7cn0NCiNWaXN1YWxpemFuZG8gb3MgZGFkb3MgDQpnbGltcHNlKHRyYWluX3F1YWxpKQ0KDQojIFByb2N1cmFuZG8gVmFsb3JlcyBtaXNzaW5nDQpjb2xTdW1zKGlzLm5hKHRyYWluX3F1YWxpKSkNCg0KYGBgDQoNCg0KQ29tIGVzc2EgYW5hbGlzZSBhIGNpbWEgc2FiZW1vcyBvIHNlZ3VpbnRlIHNvYnJlIG9zIG5vc3NvcyBkYWRvcyBjYXRlZ8Ozcmljb3M6DQoNCi0gVGVtb3MgMTQ2MCBvYnNlcnZhw6fDtWVzIGUgNDMgdmFyacOhdmVpcw0KLSBEYXMgNDMgdmFyacOhdmVpcyAxNSBhcHJlc2VudGFtIHZhbG9yZXMgZmFsdGFudGVzIChOQSkNCi0gRGFzIDE1IHZhcmnDoXZlaXMgY29tIHZhbG9yZXMgbnVsb3MgcG9kZW1vcyBjb25jbHVpciBxdWU6DQogIC0tQWxsZXkgICwgUG9vbFFDLCBGZW5jZSBlIE1pc0ZlYXR1cmUgc8OjbyBtYWlzIGRlIDkwJSBkZSB2YWxvcmVzIGZhbHRhbnRlcywgc2VuZG8gbWFpcyBjb2VyZW50ZSBhIGV4Y2x1c8OjbyBkZXN0YXMgZG8gbm9zc28gYmFuY28gZGUgZGFkby4NCi0gQXMgdmFyacOhdmVpcyByZXN0YW50ZXMgcXVlIGFwcmVzZW50YW0gdmFsb3JlcyBudWxvcyBzZXLDo28gdHJhdGFkYXMgZSBvIHZhbG9yIGRhIG1vZGEgc2Vyw6EgdXNhZG8gcGFyYSBzdWJzdGl0dWlyIG9zIHZhbG9yZXMgTkFgcy4gDQoNCmBgYHtyfQ0KDQojIERlbGV0YW5kbyBhcyBjb2x1bmFzIHF1ZSBuw6NvIHPDo28gZGUgaW50ZXJlc3NlIHBhcmEgbyBkYXRhc2V0LiANCg0KdHJhaW5fcXVhbGkgPC10cmFpbl9xdWFsaSAlPiUgc2VsZWN0KC1jKEFsbGV5LCBQb29sUUMsIEZlbmNlLCBNaXNjRmVhdHVyZSkpDQpwcmludChkaW0odHJhaW5fcXVhbGkpKQ0KDQoNCg0KIyBMaXN0YSBjb20gYXMgdmFyacOhdmVpcyA6IE1hc1ZuclR5cGUsQnNtdFF1YWwsQnNtdENvbmQsQnNtdEV4cG9zdXJlLEJzbXRGaW5UeXBlMSxCc210RmluVHlwZTIsRWxlY3RyaWNhbCwNCiMgRmlyZXBsYWNlUXUsR2FyYWdlVHlwZSxHYXJhZ2VGaW5pc2gsR2FyYWdlUXVhbCxHYXJhZ2VDb25kDQojIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIw0KIyBDb250YW5kbyBvcyB2YWxvcmVzIE5BYHMgZGFzIHZhcmnDoXZlaXMgY29tIG1pc3NpbmcgdmFsdWVzDQp1bmlxdWUodHJhaW5fcXVhbGkkTWFzVm5yVHlwZSkNCnNvcnQodGFibGUodHJhaW5fcXVhbGkkTWFzVm5yVHlwZSkpDQoNCiMgQWx0ZXJhbmRvIG9zIHZhbG9yZXMgZGUgTkEgcGFyYSBvIHZhbG9yIGRhIE1vZGEgZGUgTWFzVm5yVHlwZS4NCnRyYWluX3F1YWxpIDwtIHRyYWluX3F1YWxpICU+JSBtdXRhdGUoTWFzVm5yVHlwZT1jb2FsZXNjZShNYXNWbnJUeXBlLCJOb25lIikpDQoNCiMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMNCiMgQ29udGFuZG8gb3MgdmFsb3JlcyBOQWBzIGRhcyB2YXJpw6F2ZWlzIGNvbSBtaXNzaW5nIHZhbHVlcw0KdW5pcXVlKHRyYWluX3F1YWxpJEJzbXRRdWFsKQ0KDQpzb3J0KHRhYmxlKHRyYWluX3F1YWxpJEJzbXRRdWFsKSkNCg0KIyBBbHRlcmFuZG8gb3MgdmFsb3JlcyBkZSBOQSBwYXJhIG8gdmFsb3IgZGEgTW9kYSBkZSBCc210UXVhbC4NCnRyYWluX3F1YWxpIDwtIHRyYWluX3F1YWxpICU+JSAgICANCiAgbXV0YXRlKEJzbXRRdWFsPWNvYWxlc2NlKEJzbXRRdWFsLCJUQSIpKQ0KDQojIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjDQoNCiMgQ29udGFuZG8gb3MgdmFsb3JlcyBOQWBzIGRhcyB2YXJpw6F2ZWlzIGNvbSBtaXNzaW5nIHZhbHVlcw0KdW5pcXVlKHRyYWluX3F1YWxpJEJzbXRDb25kKQ0KDQpzb3J0KHRhYmxlKHRyYWluX3F1YWxpJEJzbXRDb25kKSkNCg0KIyBBbHRlcmFuZG8gb3MgdmFsb3JlcyBkZSBOQSBwYXJhIG8gdmFsb3IgZGEgTW9kYSBkZSBCc210Q29uZA0KdHJhaW5fcXVhbGkgPC0gdHJhaW5fcXVhbGkgJT4lICAgIA0KICBtdXRhdGUoQnNtdENvbmQ9Y29hbGVzY2UoQnNtdENvbmQsIlRBIikpDQoNCiMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMNCg0KIyBDb250YW5kbyBvcyB2YWxvcmVzIE5BYHMgZGFzIHZhcmnDoXZlaXMgY29tIG1pc3NpbmcgdmFsdWVzDQp1bmlxdWUodHJhaW5fcXVhbGkkQnNtdEV4cG9zdXJlKQ0KDQpzb3J0KHRhYmxlKHRyYWluX3F1YWxpJEJzbXRFeHBvc3VyZSkpDQoNCiMgQWx0ZXJhbmRvIG9zIHZhbG9yZXMgZGUgTkEgcGFyYSBvIHZhbG9yIGRhIE1vZGEgZGUgQnNtdEV4cG9zdXJlDQp0cmFpbl9xdWFsaSA8LSB0cmFpbl9xdWFsaSAlPiUgICAgDQogIG11dGF0ZShCc210RXhwb3N1cmU9Y29hbGVzY2UoQnNtdEV4cG9zdXJlLCJObyIpKQ0KDQojIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjDQoNCiMgQ29udGFuZG8gb3MgdmFsb3JlcyBOQWBzIGRhcyB2YXJpw6F2ZWlzIGNvbSBtaXNzaW5nIHZhbHVlcw0KdW5pcXVlKHRyYWluX3F1YWxpJEJzbXRGaW5UeXBlMSkNCg0Kc29ydCh0YWJsZSh0cmFpbl9xdWFsaSRCc210RmluVHlwZTEpKQ0KDQojIEFsdGVyYW5kbyBvcyB2YWxvcmVzIGRlIE5BIHBhcmEgbyB2YWxvciBkYSBNb2RhIGRlIEJzbXRGaW5UeXBlMQ0KdHJhaW5fcXVhbGkgPC0gdHJhaW5fcXVhbGkgJT4lICAgIA0KICBtdXRhdGUoQnNtdEZpblR5cGUxPWNvYWxlc2NlKEJzbXRGaW5UeXBlMSwiVW5mIikpDQojIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjDQoNCiMgQ29udGFuZG8gb3MgdmFsb3JlcyBOQWBzIGRhcyB2YXJpw6F2ZWlzIGNvbSBtaXNzaW5nIHZhbHVlcw0KdW5pcXVlKHRyYWluX3F1YWxpJEJzbXRGaW5UeXBlMikNCg0Kc29ydCh0YWJsZSh0cmFpbl9xdWFsaSRCc210RmluVHlwZTIpKQ0KDQojIEFsdGVyYW5kbyBvcyB2YWxvcmVzIGRlIE5BIHBhcmEgbyB2YWxvciBkYSBNb2RhIGRlIEJzbXRGaW5UeXBlMg0KdHJhaW5fcXVhbGkgPC0gdHJhaW5fcXVhbGkgJT4lICAgIA0KICBtdXRhdGUoQnNtdEZpblR5cGUyPWNvYWxlc2NlKEJzbXRGaW5UeXBlMiwiVW5mIikpDQoNCiMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMNCg0KIyBDb250YW5kbyBvcyB2YWxvcmVzIE5BYHMgZGFzIHZhcmnDoXZlaXMgY29tIG1pc3NpbmcgdmFsdWVzDQp1bmlxdWUodHJhaW5fcXVhbGkkRWxlY3RyaWNhbCkNCg0Kc29ydCh0YWJsZSh0cmFpbl9xdWFsaSRFbGVjdHJpY2FsKSkNCg0KIyBBbHRlcmFuZG8gb3MgdmFsb3JlcyBkZSBOQSBwYXJhIG8gdmFsb3IgZGEgTW9kYSBkZSBFbGVjdHJpY2FsDQp0cmFpbl9xdWFsaSA8LSB0cmFpbl9xdWFsaSAlPiUgICAgDQogIG11dGF0ZShFbGVjdHJpY2FsPWNvYWxlc2NlKEVsZWN0cmljYWwsIlNCcmtyIikpDQoNCg0KIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIw0KDQojIENvbnRhbmRvIG9zIHZhbG9yZXMgTkFgcyBkYXMgdmFyacOhdmVpcyBjb20gbWlzc2luZyB2YWx1ZXMNCnVuaXF1ZSh0cmFpbl9xdWFsaSRGaXJlcGxhY2VRdSkNCg0Kc29ydCh0YWJsZSh0cmFpbl9xdWFsaSRGaXJlcGxhY2VRdSkpDQoNCiMgQWx0ZXJhbmRvIG9zIHZhbG9yZXMgZGUgTkEgcGFyYSBvIHZhbG9yIGRhIE1vZGEgZGUgRmlyZXBsYWNlUXUNCnRyYWluX3F1YWxpIDwtIHRyYWluX3F1YWxpICU+JSAgICANCiAgbXV0YXRlKEZpcmVwbGFjZVF1PWNvYWxlc2NlKEZpcmVwbGFjZVF1LCJHZCIpKQ0KDQojIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjDQoNCiMgQ29udGFuZG8gb3MgdmFsb3JlcyBOQWBzIGRhcyB2YXJpw6F2ZWlzIGNvbSBtaXNzaW5nIHZhbHVlcw0KdW5pcXVlKHRyYWluX3F1YWxpJEdhcmFnZVR5cGUpDQoNCnNvcnQodGFibGUodHJhaW5fcXVhbGkkR2FyYWdlVHlwZSkpDQoNCiMgQWx0ZXJhbmRvIG9zIHZhbG9yZXMgZGUgTkEgcGFyYSBvIHZhbG9yIGRhIE1vZGEgZGUgR2FyYWdlVHlwZQ0KdHJhaW5fcXVhbGkgPC0gdHJhaW5fcXVhbGkgJT4lICAgIA0KICBtdXRhdGUoR2FyYWdlVHlwZT1jb2FsZXNjZShHYXJhZ2VUeXBlLCJBdHRjaGQiKSkNCg0KIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIw0KDQojIENvbnRhbmRvIG9zIHZhbG9yZXMgTkFgcyBkYXMgdmFyacOhdmVpcyBjb20gbWlzc2luZyB2YWx1ZXMNCnVuaXF1ZSh0cmFpbl9xdWFsaSRHYXJhZ2VGaW5pc2gpDQoNCnNvcnQodGFibGUodHJhaW5fcXVhbGkkR2FyYWdlRmluaXNoKSkNCg0KIyBBbHRlcmFuZG8gb3MgdmFsb3JlcyBkZSBOQSBwYXJhIG8gdmFsb3IgZGEgTW9kYSBkZSBHYXJhZ2VGaW5pc2gNCnRyYWluX3F1YWxpIDwtIHRyYWluX3F1YWxpICU+JSAgICANCiAgbXV0YXRlKEdhcmFnZUZpbmlzaD1jb2FsZXNjZShHYXJhZ2VGaW5pc2gsIlVuZiIpKQ0KDQojIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjDQoNCiMgQ29udGFuZG8gb3MgdmFsb3JlcyBOQWBzIGRhcyB2YXJpw6F2ZWlzIGNvbSBtaXNzaW5nIHZhbHVlcw0KdW5pcXVlKHRyYWluX3F1YWxpJEdhcmFnZVF1YWwpDQoNCnNvcnQodGFibGUodHJhaW5fcXVhbGkkR2FyYWdlUXVhbCkpDQoNCiMgQWx0ZXJhbmRvIG9zIHZhbG9yZXMgZGUgTkEgcGFyYSBvIHZhbG9yIGRhIE1vZGEgZGUgR2FyYWdlUXVhbA0KdHJhaW5fcXVhbGkgPC0gdHJhaW5fcXVhbGkgJT4lICAgIA0KICBtdXRhdGUoR2FyYWdlUXVhbD1jb2FsZXNjZShHYXJhZ2VRdWFsLCJUQSIpKQ0KDQojIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjDQoNCiMgQ29udGFuZG8gb3MgdmFsb3JlcyBOQWBzIGRhcyB2YXJpw6F2ZWlzIGNvbSBtaXNzaW5nIHZhbHVlcw0KdW5pcXVlKHRyYWluX3F1YWxpJEdhcmFnZUNvbmQpDQoNCnNvcnQodGFibGUodHJhaW5fcXVhbGkkR2FyYWdlQ29uZCkpDQoNCiMgQWx0ZXJhbmRvIG9zIHZhbG9yZXMgZGUgTkEgcGFyYSBvIHZhbG9yIGRhIE1vZGEgZGUgR2FyYWdlQ29uZA0KdHJhaW5fcXVhbGkgPC0gdHJhaW5fcXVhbGkgJT4lICAgIA0KICBtdXRhdGUoR2FyYWdlQ29uZD1jb2FsZXNjZShHYXJhZ2VDb25kLCJUQSIpKQ0KDQoNCmBgYA0KTm8gZW50YW50bywgYWx0ZXJhciBuYSBtw6NvIGNhZGEgdmFyacOhdmVsIG7Do28gw6kgYSBzb2x1w6fDo28gbWFpcyBpbnRlbGlnZW50ZSwgaW1hZ2luZSB1bSBjYXNvIGRlIHVtIGRhdGFzZXQgY29tIGNlbnRlbmFzIG91IGF0w6kgbWlsaGFyZXMgZGUgdmFyacOhdmVpcyBwYXJhIHNlcmVtIGFsdGVyYWRhcy4NCg0KTyBtZWxob3IgYSBzZXIgZmVpdG8gw6kgbG9vcCBkbyB0aXBvIGZvciwgY2FwYXogZGUgdmFycmVyIHRvZGFzIGFzIHZhcmnDoXZlaXMsIG9idGVyIG8gdmFsb3IgZGUgZnJlcXVlbmNpYSBwYXJhIGNhZGEgdmFsb3IgZSBhcm1hemVuYXIgZXN0ZSBjb20gYSBtb2RhIGUgZW0gc2VndWlkYSBzdWJzdGl0dWlyIG9zIG1pc3NpbmcgdmFsdWVzIGRlIGNhZGEgdmFyacOhdmVsLiANCg0KVmFtb3MgZW50w6NvIGNhcnJlZ2FyIG8gYmFuY28gZGUgZGFkb3Mgbm92YW1lbnRlIHBhcmEgYW51bGFyIGFzIHRyYW5zZm9ybWHDp8O1ZXMgcmVhbGl6YWRhcyBlIHNlcGFyYXIgZW0gZGFkb3MgcXVhbnRpdGF0aXZvcyBlIHF1YWxpdGF0aXZvcyBlIGVtIHNlZ3VpZGEgc29tYXIgb3MgdmFsb3JlcyBOQWBzIGRhcyB2YXJpw6F2ZWlzLiANCg0KYGBge3J9DQoNCiMgU29tYW5kbyBvcyB2YWxvcmVzIE5BYHMgcGFyYSBjYWRhIGNvbHVuYQ0KY29sU3Vtcyhpcy5uYSh0cmFpbl9xdWFsaSkpDQoNCiMgTW9udGFuZG8gdW0gbG9vcCBkbyB0aXBvIGZvciANCmZvcihpdGVtIGluIGNvbG5hbWVzKHRyYWluX3F1YWxpKSl7DQogIA0KICB4ID0gZGF0YS5mcmFtZShzb3J0KHRhYmxlKHRyYWluX3F1YWxpW2l0ZW1dKSkpDQogICNwcmludCh4KQ0KICBtb2RlID0gdGFpbCh4LCBuID0gMSkNCiAgI3ByaW50KG1vZGUpDQogIG1vZGEgPSAobW9kZVsxLDFdKQ0KICBtb2RhIDwtIChhcy5jaGFyYWN0ZXIobW9kYSkpDQogICNwcmludChtb2RhKQ0KICB0cmFpbl9xdWFsaSA8LSB0cmFpbl9xdWFsaSAlPiUgDQogIG11dGF0ZShhY3Jvc3MoaXRlbSwgfiBjYXNlX3doZW4oaXMubmEoLikgfiBtb2RhLCBUUlVFIH4gLikpKQ0KICANCiAgDQogIA0KfQ0KDQojIHRlc3RhbmRvIG8gcmVzdWx0YWRvDQp1bmlxdWUodHJhaW5fcXVhbGkkU3RyZWV0KQ0KdGFibGUodHJhaW5fcXVhbGkkU3RyZWV0KQ0KIyBTb21hbmRvIG9zIHZhbG9yZXMgTkFgcyBwYXJhIGNhZGEgY29sdW5hIHBhcmEgY29uZmVyaXIgb3MgcmVzdWx0YWRvcyANCmNvbFN1bXMoaXMubmEodHJhaW5fcXVhbGkpKQ0KYGBgDQoNCiMgNC4gTGltcGV6YSBkZSBkYWRvcyBxdWFudGl0YXRpdm9zDQoNCkRhIG1lc21hIG1hbmVpcmEgcXVlIHJlYWxpemFtb3MgYSBsaW1wZXphIGRvcyBkYWRvcyBmYWx0YW50ZXMgbm8gYmFuY28gZGUgZGFkb3MgcXVhbGl0YXRpdm9zIGluaWNpYW1vcyBhcXVpIGEgbGltcGV6YSBkZSBkYWRvcyBmYWx0YW50ZXMgbm9zIGRhZG9zIHF1YW50aXRhdGl2b3MNCg0KYGBge3J9DQojIFZpc3VhbGl6YW5kbyBvcyBkYWRvcyANCmdsaW1wc2UodHJhaW5fcXVhbnRpKQ0KDQojIFNvbWFuZG8gb3MgdmFsb3JlcyBmYWx0YW50ZXMNCmNvbFN1bXMoaXMubmEodHJhaW5fcXVhbnRpKSkNCg0KDQpgYGANCiAgDQpDb20gbyByZXN1bHRhZG8gZG8gc29tYXTDs3JpbyBkb3MgdmFsb3JlcyBhdXNlbnRlcyBwb3IgdmFyacOhdmVsIGRvIGJhbmNvIGRlIGRhZG9zIHF1YW50aXRhdGl2b3MsIG9ic2VydmFtb3MgcXVlIGFwZW5hcyAzIHZhcmnDoXZlaXMgcG9zc3VlbSBtaXNzaW5nIHZhbHVlcy4gU2VuZG8gYXNzaW0gYSBtZWxob3Igb3DDp8OjbyDDqSB1dGlsaXphciBhIGZ1bsOnw6NvIG11dGF0ZSgpIGUgc3Vic3RpdHVpciBvcyBOQWBzIHBlbGEgbcOpZGlhIGVudHJlIGFzIG9ic2VydmHDp8O1ZXMgcGFyYSBjYWRhIHZhcmnDoXZlbC4gDQoNCmBgYHtyfQ0KIyBMb3RGcm9udGFnZQ0KdHJhaW5fcXVhbnRpIDwtIHRyYWluX3F1YW50aSAlPiUgICAgDQogIG11dGF0ZShMb3RGcm9udGFnZT1jb2FsZXNjZShMb3RGcm9udGFnZSwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG1lYW4oeCA9dHJhaW5fcXVhbnRpJExvdEZyb250YWdlLCBuYS5ybSA9IFQgKSkpDQoNCiMgTWFzVm5yQXJlYQ0KdHJhaW5fcXVhbnRpIDwtIHRyYWluX3F1YW50aSAlPiUgICAgDQogIG11dGF0ZShNYXNWbnJBcmVhPWNvYWxlc2NlKE1hc1ZuckFyZWEsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBtZWFuKHggPXRyYWluX3F1YW50aSRNYXNWbnJBcmVhLCBuYS5ybSA9IFQgKSkpDQoNCiMgR2FyYWdlWXJCbHQNCnRyYWluX3F1YW50aSA8LSB0cmFpbl9xdWFudGkgJT4lICAgIA0KICBtdXRhdGUoR2FyYWdlWXJCbHQ9Y29hbGVzY2UoR2FyYWdlWXJCbHQsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBtZWFuKHggPXRyYWluX3F1YW50aSRHYXJhZ2VZckJsdCwgbmEucm0gPSBUICkpKQ0KDQojIENvbmZlcmluZG8gc2UgYWluZGEgZXhpc3RlIHZhbG9yZXMgZmFsdGFudGVzOg0KY29sU3Vtcyhpcy5uYSh0cmFpbl9xdWFudGkpKQ0KDQpgYGANCiMgNS4gVW5pbmRvIG9zIGRhdGFmcmFtZXMgcXVhbGkgZSBxdWFudGkgZGUgdHJlaW5vIA0KDQpBcMOzcyBhIGxpbXBlemEgZG9zIGRhZG9zIGRlIGZvcm1hIGFkZXF1YWRhIGRlIGFjb3JkbyBjb20gbyB0aXBvIGRlIGRhZG8sIG9uZGUgcGFyYSBvcyBkYWRvcyBjYXRlZ8Ozcmljb3MgbsOzcyB1dGlsaXphbW9zIG8gdmFsb3IgZGEgbW9kYSBwYXJhIHN1YnN0aXR1aXIgb3MgbWlzc2luZyB2YWx1ZXMgZSBvIHZhbG9yIGRhIG1lZGlhIHBhcmEgc3Vic3RpdHVpciBvcyBkYWRvcyBhdXNlbnRlIG5vcyBkYWRvcyBxdWFudGl0YXRpdm9zLCBwb2RlbW9zIGNvbSBzZWd1cmFuw6dhIHVuaXIgZXNzZSBiYW5jb3MgZGUgZGFkb3MgZW0gYXBlbmFzIHVtIGJhbmNvIGRlIGRhZG8gasOhIHRyYXRhZG8gZSBwcm9udG8gcGFyYSBhIGV0YXBhIGRlIGFuw6FsaXNlIGRlIGRhZG9zLiANCg0KYGBge3J9DQoNCiMgVW5pbmRvIG9zIGJhbmNvcyBkZSBkYWRvcw0KDQp0cmFpbi5jbGVhbiA8LSBjYmluZCh0cmFpbl9xdWFsaSx0cmFpbl9xdWFudGkpDQpuYW1lcyh0cmFpbi5jbGVhbikNCmBgYA0KDQojIDYuIExpbXBhbmRvIG9zIGRhZG9zIGRlIHRlc3RlLg0KDQpEYSBtZXNtYSBmb3JtYSBxdWUgcHJvY2VkZW1vcyBjb20gb3MgZGFkb3MgZGUgdHJlaW5vIGlyZW1vcyBhYm9yZGFyIGFnb3JhIG9zIGRhZG9zIGRlIHRlc3RlIGUgZGVpeGFyIHRvZG9zIG9zIG5vc3NvcyBiYW5jb3MgZGUgZGFkb3MgcHJvbnRvcyBwYXJhIGEgZXRhcGEgZGUgYW7DoWxpc2UgZXhwbG9yYXTDs3JpYSBkZSBkYWRvcy4gDQpBIHByaW1laXJhIGV0YXBhIHNlcsOhIGEgc2VwYXJhw6fDo28gZW0gZGFkb3MgcXVhbGl0YXRpdm9zIGUgZGFkb3MgcXVhbnRpdGF0aXZvcywgc2VndWlkbyBkYSBsaW1wZXphICBkb3MgdmFsb3JlcyBhdXNlbnRlcyBlIHBvciBmaW0gYSB1bmnDo28gZG9zIGJhbmNvcyBxdWFsaSBlIHF1YW50aSBkZSB0ZXN0ZSBlbSBhcGVuYXMgdW0gZGF0YWZyYW1lIHRyYXRhZG8uIA0KDQpgYGB7cn0NCiMgVmlzdWFsaXphbmRvIG9zIGRhZG9zIGRlIHRlc3RlDQpnbGltcHNlKHRlc3QpDQoNCiMgc2VwYXJhbmRvIG9zIGRhZG9zIGRlIHRlc3RlIGVtIHF1YW50aSBlIHF1YWxpDQoNCnRlc3RfcXVhbnRpIDwtIHNlbGVjdF9pZih0ZXN0LCBpcy5udW1lcmljKQ0KDQp0ZXN0X3F1YWxpIDwtIHNlbGVjdF9pZih0ZXN0LCBpcy5jaGFyYWN0ZXIpDQoNCiMgVmlzdWFsaXphbmRvIG8gcmVzdWx0YWRvDQoNCmdsaW1wc2UodGVzdF9xdWFsaSkNCmdsaW1wc2UodGVzdF9xdWFudGkpDQoNCiMgVHJhdGFuZG8gb3MgZGFkb3MgcXVhbGl0YXRpdm9zDQoNCmNvbFN1bXMoaXMubmEodGVzdF9xdWFsaSkpDQoNCg0KIyBPYnNlcnZlIHF1ZSB0ZW1vcyBvcyBtZXNtb3MgcHJvYmxlbWFzLCBleGlzdGVtIHZhcmnDoXZlbCBxdWUgc8OjbyBwcmF0aWNhbWVudGUgbnVsYXMsIHNlbmRvDQojIGEgbWVsaG9yIGVzY29saGEgZGVsZXRhciBlbGFzIGRvIG5vc3NvIGJhbmNvIGRlIGRhZG9zLiANCg0KdGVzdF9xdWFsaSA8LSB0ZXN0X3F1YWxpICU+JSBzZWxlY3QoLWMoQWxsZXksIFBvb2xRQywgRmVuY2UsIE1pc2NGZWF0dXJlKSkNCmdsaW1wc2UodGVzdF9xdWFsaSkNCg0KDQojIEFwbGljYW5kbyBvIG1lc21vIGxvb3AgZm9yIHBhcmEgdHJhdGFyIG9zIGRhZG8gZGUgdGVzdGUgcXVhbGl0YXRpdm9zLg0KDQpmb3IoaXRlbSBpbiBjb2xuYW1lcyh0ZXN0X3F1YWxpKSl7DQogICNwcmludChpdGVtKSANCiAgeCA9IGRhdGEuZnJhbWUoc29ydCh0YWJsZSh0ZXN0X3F1YWxpW2l0ZW1dKSkpDQogICNwcmludCh4KQ0KICBtb2RlID0gdGFpbCh4LCBuID0gMSkNCiAgI3ByaW50KG1vZGUpDQogIG1vZGEgPSAobW9kZVsxLDFdKQ0KICBtb2RhIDwtIGFzLmNoYXJhY3Rlcihtb2RhKQ0KICAjcHJpbnQodHJhaW5fcXVhbGlbaXRlbV0pDQogIHRlc3RfcXVhbGkgPC0gdGVzdF9xdWFsaSAlPiUgDQogIG11dGF0ZShhY3Jvc3MoaXRlbSwgfiBjYXNlX3doZW4oaXMubmEoLikgfiBtb2RhLCBUUlVFIH4gLikpKQ0KfQ0KDQojIENvbmZlcmluZG8gYSBzb21hIGRvcyB2YWxvcmVzIG51bG9zIG5hcyBjb2x1bmFzIGFww7NzIGEgdHJhbnNmb3Jtw6fDo28gDQp1bmlxdWUodGVzdF9xdWFsaSRNYXNWbnJUeXBlKQ0KdGFibGUodGVzdF9xdWFsaSRNYXNWbnJUeXBlKQ0KY29sU3Vtcyhpcy5uYSh0ZXN0X3F1YWxpKSkNCg0KDQojIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIw0KDQojIFRyYXRhbmRvIG9zIGRhZG9zIHF1YW50aXRhdGl2b3MgDQoNCiMgdmlzdWFpemFuZG8gb3MgZGFkb3MNCmdsaW1wc2UodGVzdF9xdWFudGkpDQoNCiMgQ29uZmVyaW5kbyBvcyB2YWxvcmVzIGF1c2VudGVzDQoNCmNvbFN1bXMoaXMubmEodGVzdF9xdWFudGkpKQ0KDQojIEFvIHRvZG8gdGVtb3MgOCB2YXJpYXZlaXMgcXVlIGFwcmVzZW50YW0gdmFsb3JlcyBhdXNlbnRlcy4NCnRlc3RfcXVhbnRpIDwtIHRlc3RfcXVhbnRpICU+JSAgICANCiAgbXV0YXRlKExvdEZyb250YWdlPWNvYWxlc2NlKExvdEZyb250YWdlLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbWVhbih4ID10ZXN0X3F1YW50aSRMb3RGcm9udGFnZSwgbmEucm0gPSBUICkpKQ0KdGVzdF9xdWFudGkgPC0gdGVzdF9xdWFudGkgJT4lICAgIA0KICBtdXRhdGUoTWFzVm5yQXJlYT1jb2FsZXNjZShNYXNWbnJBcmVhLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbWVhbih4ID10ZXN0X3F1YW50aSRNYXNWbnJBcmVhLCBuYS5ybSA9IFQgKSkpDQp0ZXN0X3F1YW50aSA8LSB0ZXN0X3F1YW50aSAlPiUgICAgDQogIG11dGF0ZShCc210RmluU0YxID1jb2FsZXNjZShCc210RmluU0YxICwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG1lYW4oeCA9dGVzdF9xdWFudGkkQnNtdEZpblNGMSAsIG5hLnJtID0gVCApKSkNCg0KdGVzdF9xdWFudGkgPC0gdGVzdF9xdWFudGkgJT4lICAgIA0KICBtdXRhdGUoQnNtdEZpblNGMiA9Y29hbGVzY2UoQnNtdEZpblNGMiAsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBtZWFuKHggPXRlc3RfcXVhbnRpJEJzbXRGaW5TRjIgLCBuYS5ybSA9IFQgKSkpDQoNCnRlc3RfcXVhbnRpIDwtIHRlc3RfcXVhbnRpICU+JSAgICANCiAgbXV0YXRlKEJzbXRVbmZTRiA9Y29hbGVzY2UoQnNtdFVuZlNGICwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG1lYW4oeCA9dGVzdF9xdWFudGkkQnNtdFVuZlNGICwgbmEucm0gPSBUICkpKQ0KDQoNCnRlc3RfcXVhbnRpIDwtIHRlc3RfcXVhbnRpICU+JSAgICANCiAgbXV0YXRlKFRvdGFsQnNtdFNGID1jb2FsZXNjZShUb3RhbEJzbXRTRiAsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBtZWFuKHggPXRlc3RfcXVhbnRpJFRvdGFsQnNtdFNGICwgbmEucm0gPSBUICkpKQ0KDQp0ZXN0X3F1YW50aSA8LSB0ZXN0X3F1YW50aSAlPiUgICAgDQogIG11dGF0ZShCc210RnVsbEJhdGggPWNvYWxlc2NlKEJzbXRGdWxsQmF0aCAsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBtZWFuKHggPXRlc3RfcXVhbnRpJEJzbXRGdWxsQmF0aCAsIG5hLnJtID0gVCApKSkNCg0KdGVzdF9xdWFudGkgPC0gdGVzdF9xdWFudGkgJT4lICAgIA0KICBtdXRhdGUoQnNtdEhhbGZCYXRoID1jb2FsZXNjZShCc210SGFsZkJhdGggLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbWVhbih4ID10ZXN0X3F1YW50aSRCc210SGFsZkJhdGggLCBuYS5ybSA9IFQgKSkpDQoNCg0KdGVzdF9xdWFudGkgPC0gdGVzdF9xdWFudGkgJT4lICAgIA0KICBtdXRhdGUoR2FyYWdlWXJCbHQgPWNvYWxlc2NlKEdhcmFnZVlyQmx0ICwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG1lYW4oeCA9dGVzdF9xdWFudGkkR2FyYWdlWXJCbHQgLCBuYS5ybSA9IFQgKSkpDQoNCg0KdGVzdF9xdWFudGkgPC0gdGVzdF9xdWFudGkgJT4lICAgIA0KICBtdXRhdGUoR2FyYWdlQ2FycyA9Y29hbGVzY2UoR2FyYWdlQ2FycyAsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBtZWFuKHggPXRlc3RfcXVhbnRpJEdhcmFnZUNhcnMgLCBuYS5ybSA9IFQgKSkpDQoNCg0KdGVzdF9xdWFudGkgPC0gdGVzdF9xdWFudGkgJT4lICAgIA0KICBtdXRhdGUoR2FyYWdlQXJlYSA9Y29hbGVzY2UoR2FyYWdlQXJlYSAsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBtZWFuKHggPXRlc3RfcXVhbnRpJEdhcmFnZUFyZWEgLCBuYS5ybSA9IFQgKSkpDQoNCmNvbFN1bXMoaXMubmEodGVzdF9xdWFudGkpKQ0KDQojIEp1bnRhbmRvIG9zIGRhdGFmcmFtZXMNCg0KdGVzdC5jbGVhbiA8LSBjYmluZCh0ZXN0X3F1YWxpLHRlc3RfcXVhbnRpKQ0KDQoNCg0KYGBgDQoNClBvciBmaW0sIGNhYmUgYWdvcmEgY29uZmVyaXIgb3MgZGFkb3MgZGUgdHJlaW5vIGUgdGVzdGUgdHJhdGFkb3MNCg0KYGBge3J9DQoNCmNvbFN1bXMoaXMubmEodHJhaW4uY2xlYW4pKQ0KY29sU3Vtcyhpcy5uYSh0ZXN0LmNsZWFuKSkNCg0KDQpgYGANCiMgNy4gQW7DoWxpc2UgRXhwbG9yYXTDs3JpYSBkZSBEYWRvcy4NCg0KTmVzdGEgZXRhcGEgaW5pY2lhcmVtb3MgdW1hIGFuYWxpc2UgZGVzY3JpdGl2YSB0YW50byBkYXMgdmFyacOhdmVpcyBxdWFsaXRhdGl2YXMgY29tbyBxdWFudGl0YXRpdmFzIGRvIGJhbmNvIGRlIGRhZG9zIGRlIHRyZWluby4gIFBhcmEgb3MgZGFkb3MgcXVhbnRpdGF0aXZvcyBhYm9yZGFyZW1vcyBlbSBwcmltZWlyYSBpbnN0YW5jaWEgdW1hIGludmVzdGlnYcOnw6NvIHBvciBlc3RhdGlzdGljYSBkZWNyaXRpdmEgdW5pdmFyaWFkYSBlIGRlcG9pcyBiaXZhcmlhZGEgcGFyYSBlbnRlbmRlciBhIHN1YSByZWxhw6fDo28gY29tIGEgdmFyaWF2ZWwgWSBkbyBwcm9ibGVtYSBxdWUgw6kgYSBTYWxlc1ByaWNlLiANCg0KIyMjIDcuMSBFc3RhdGlzdGljYSBEZXNjcml0aXZhIHBhcmEgZGFkb3MgUXVhbGl0YXRpdm9zLg0KQSBlc3RhdGlzdGljYSB1bml2YXJpYWRhIHBhcmEgIGNvbnRlbXBsYSB0YWJlbGFzIGRlIGZlcmVxdWVuY2lhIGRlIG9jb3JyZW5jaWEgbyBxdWUgasOhIHZpbW9zIGFudGVyaW9ybWVudGUgYW8gb2J0ZXJtb3MgYSBtb2RhLCByZXByZXNlbnRhw6fDo28gZ3LDoWZpY2EgZGEgZGlzdHJpYnVpw6fDo28gZSBtZWRpZGFzIGRlIGxvY2FsaXphw6fDo28sIGRpc3BlcnPDo28gb3UgdmFyaWFiaWxpZGFkZSBlIG1lZGlkYXMgZGUgZm9ybWEoYXNzaW1ldHJpYSBlIGN1cnRvc2lzKS4NCg0KQSB0YWJlbGEgZGUgZGlzdHJpYnVpw6fDo28gZGUgZnJlcXXDqm5jaWFzIMOpIGNhbGN1bGFkYSBwYXJhIGNhZGEgdmFsb3IgZGlzY3JldG8gZGEgdmFyacOhdmVsLiBBIGZyZXF1w6puY2lhIHBvZGUgc2VyIGFic29sdXRhIHF1ZSBpbmZvcm1hIG8gbsO6bWVybyBkZSBvY29ycsOqbmNpYXMgZGUgY2FkYSBlbGVtZW50byBpIG5hIGFtb3N0cmEsIHBvZGUgc2VyIHVtYSBmcmVxdcOqbmNpYSByZWxhdGl2YSBxdWUgZm9ybmVjZSBhIHBvcmNlbnRhZ2VtICUgcmVsYXRpdmEgw6AgZnJlcXXDqm5jaWEgYWJzb2x1dGEsIHBvZGUgc2VyIHVtYSBmcmVxdcOqbmNpYSBhY3VtdWxhZGEgcXVlIHJlcHJlc2VudGEgYSBzb21hIGRlIHRvZG9zIG9zIGVsZW1lbnRvcyBlIHBvciBmaW0gcG9kZSBzZXIgdW1hIGZyZXF1w6puY2lhIHJlbGF0aXZhIGFjdW11bGFkYSBxdWUgw6kgYSBmcmVxdWVuY2lhIHJlbGF0aXZhIMOgIGFjdW11bGFkYS4gDQoNCmBgYHtyfQ0KDQojIENhcnJlZ2FuZG8gb3MgcGFjb3Rlcy4gDQoNCmxpYnJhcnkoZ2dwbG90MikNCmxpYnJhcnkocGxvdGx5KQ0KDQoNCiMgQ2FsY3VsbyBkYXMgZnJlcXVlbmNpYXMgYWJzb2x1dGFzIA0KDQp0YWJlbGEuZnJlcS5NU1pvbmluZyA8LSBhcy5kYXRhLmZyYW1lKHNvcnQodGFibGUodHJhaW5fcXVhbGkkTVNab25pbmcpKSkNCmNvbHVuYXMgPC0gYyAoIk1TWm9uaW5nIiwgIkZyZXEiKQ0KbmFtZXModGFiZWxhLmZyZXEuTVNab25pbmcpIDwtIGNvbHVuYXMNCg0KIyBCYXJyYXMgdmVydGljaWFzIA0KZ2dwbG90bHkoDQpnZ3Bsb3QoZGF0YSA9IHRhYmVsYS5mcmVxLk1TWm9uaW5nLCBhZXMoeCA9IE1TWm9uaW5nLCB5ID0gRnJlcSkpKw0KICBnZW9tX2JhcihzdGF0ID0gImlkZW50aXR5IiwgZmlsbCA9ICJibHVlIiwgY29sb3IgPSAiYmxhY2siKSsNCiAgICB0aGVtZV9jbGFzc2ljKGJhc2Vfc2l6ZSA9IDE4KSANCikNCg0KDQojIEJhcnJhcyBlbXBpbGhhZGFzDQpnZ3Bsb3RseSgNCnRhYmVsYS5mcmVxLk1TWm9uaW5nICU+JSANCiAgZ2dwbG90KGFlcyh4ID0gIiIsIHkgPSBGcmVxLCBmaWxsID0gTVNab25pbmcsIGxhYmVsID0gRnJlcSkpKw0KICBnZW9tX2JhcihzdGF0ID0gImlkZW50aXR5Iiwgd2lkdGggPSAwLjMpICsgY29vcmRfZmxpcCgpICsgDQogIGdlb21fbGFiZWwocG9zaXRpb24gPSBwb3NpdGlvbl9zdGFjayh2anVzdCA9IDAuNSkpKw0KICB0aGVtZV92b2lkKCkNCikNCg0KDQpgYGANCg0KRGVzdGEgbWFuZWlyYSBjcmlhbW9zIHVtYSBmb3JtYSBlZmljaWVudGUgZGUgZXhwbG9yYW1vcyBvIGJhbmNvIGRlIGRhZG9zIHBhcmEgYXMgdmFyacOhdmVpcyBjYXRlZ8OzcmljYXMuIA0KQ29tbyB0ZW1vcyAzOSB2YXJpw6F2ZWlzIGV4cGxvcmF0w7NyaWFzIG8gbWVsaG9yIGEgZmF6ZXIgw6kgY3JpYXIgdW0gZnVuw6fDo28gcXVlIG5vcyByZXRvcm5lIGVzc2FzIHRyw6pzIGluZm9ybWHDp8O1ZXMsIGEgdGFiZWxhIGRlIGZyZXF1w6puY2lhLCBvIGdyYWZpY28gZGUgYmFycmFzIHZlcnRpY2FpcyBlIGdyYWZpY28gZGUgYmFycmFzIGVtcGlsaGFkYXMuIA0KDQpFbnTDo28gdmFtb3MgY29sb2NhciBhIG3Do28gbmEgbWFzc2EhIQ0KDQoNCmBgYHtyfQ0KYW5hbGlzZV9leHBsb19jYXRlZ29yaWNhIDwtIGZ1bmN0aW9uKHNlcmllKXsNCiAgDQogICMgQ2FsY3VsbyBkYXMgZnJlcXVlbmNpYXMgYWJzb2x1dGFzIA0KDQogIHRhYmVsYS5mcmVxIDwtIGFzLmRhdGEuZnJhbWUoc29ydCh0YWJsZShzZXJpZSkpKQ0KICBjb2x1bmFzIDwtIGMgKCJWYXJpYXZlbCIsICJGcmVxIikNCiAgbmFtZXModGFiZWxhLmZyZXEpIDwtIGNvbHVuYXMNCiAgDQogICNHcmFmaWNvIGRlIGJhcnJhcyB2ZXJ0aWNhaXMNCiAgZzEgPC0gZ2dwbG90bHkoDQogIGdncGxvdChkYXRhID0gdGFiZWxhLmZyZXEsIGFlcyh4ID0gVmFyaWF2ZWwsIHkgPSBGcmVxKSkrDQogIGdlb21fYmFyKHN0YXQgPSAiaWRlbnRpdHkiLCBmaWxsID0gImJsdWUiLCBjb2xvciA9ICJibGFjayIpKw0KICB0aGVtZV9jbGFzc2ljKGJhc2Vfc2l6ZSA9IDE4KSANCikNCiAgDQogICMgQmFycmFzIGVtcGlsaGFkYXMNCiAgZzIgPC0gZ2dwbG90bHkoDQogIHRhYmVsYS5mcmVxICU+JSANCiAgZ2dwbG90KGFlcyh4ID0gIiIsIHkgPSBGcmVxLCBmaWxsID0gVmFyaWF2ZWwsIGxhYmVsID0gRnJlcSkpKw0KICBnZW9tX2JhcihzdGF0ID0gImlkZW50aXR5Iiwgd2lkdGggPSAwLjMpICsgY29vcmRfZmxpcCgpICsgDQogIGdlb21fbGFiZWwocG9zaXRpb24gPSBwb3NpdGlvbl9zdGFjayh2anVzdCA9IDAuNSkpKw0KICB0aGVtZV92b2lkKCkNCikNCiAgcmV0b3Jub3MgPC0gbGlzdCh0YWJlbGEuZnJlcSwgZzEsIGcyKQ0KDQogIHJldHVybihyZXRvcm5vcykNCiAgDQp9DQoNCnRyYWluX3F1YWxpJEhvdXNlU3R5bGUNCnJlc3VsdGFkb19NU1pvbmluZyA8LSBhbmFsaXNlX2V4cGxvX2NhdGVnb3JpY2Eoc2VyaWUgPSAgdHJhaW5fcXVhbGkkTVNab25pbmcpDQoNCnJlc3VsdGFkb19NU1pvbmluZ1tbM11dDQoNCnJlc3VsdGFkb19TdHJlZXQgPC0gYW5hbGlzZV9leHBsb19jYXRlZ29yaWNhKHNlcmllID0gdHJhaW5fcXVhbGkkU3RyZWV0KQ0KDQpyZXN1bHRhZG9fSG91c2VTdHlsZSA8LSBhbmFsaXNlX2V4cGxvX2NhdGVnb3JpY2Eoc2VyaWU9dHJhaW5fcXVhbGkkSG91c2VTdHlsZSkNCnJlc3VsdGFkb19Ib3VzZVN0eWxlDQoNCmBgYA0KIyA4LiBBbsOhbGlzZSBleHBsb3JhdMOzcmlhIGRvcyBkYWRvcyBxdWFudGl0YXRpdm9zDQoNCg0KQSBhbsOhbGlzZSBleHBsb3JhdMOzcmlhIGRlIGRhZG9zIHF1YW50aXRhdGl2b3Mgb3MgcHJpbmNpcGFpcyBncsOhZmljb3MgcXVlIGlyw6NvIHRyYXplciBpbmZvcm1hw6fDtWVzIGltcG9ydGFudGVzIHPDo28gZGUgbGluaGEsIGRpc3BlcnPDo28sIGhpc3RvZ3JhbWEgZSBib3ggcGxvdC4gDQoNClZhbW9zIGVudMOjbyBhbmFsaXNhciBvIG5vc3NvIGJhbmNvIGRlIGRhZG9zIGRlIHRyZWlubyBxdWUgY29udGVtIGFzIHZhcmnDoXZlaXMgcXVhbnRpdGF0aXZhcywgZSBhbMOpbSBkZSBmYXplciBlc3NhcyB2aXN1YWxpemHDp8O1ZXMgdGFtYsOpbSBpcmVtb3MgaW5zZXJpciBvcyBncsOhZmljb3MgZGUgY29ycmVsYcOnw6NvIGUgaGVhdG1hcC4gDQoNCkEgbm9zc2EgdmFyacOhdmVsIFkgIFNhbGVQcmljZSwgb3Ugc2VqYSBhIHF1ZSBxdWVyZW1vcyBwcmV2ZXIsIMOpIHF1YWxpdGF0aXZhIGVudMOjbyB2YW1vcyBjb21lw6dhciBhIGFuw6FsaXNlIHBvciBlbGEgZSBzdWdlcmlyIHBvc3RlcmlvcmVzIGNvcnJlbGHDp8O1ZXMgY29tIGFzIG91dHJhcyB2YXJpw6F2ZWlzLiANCg0KVXNhcmVtb3MgbyBwYWNvdGUgW2UxMDcxXShodHRwczovL2NyYW4uci1wcm9qZWN0Lm9yZy93ZWIvcGFja2FnZXMvZTEwNzEvZTEwNzEucGRmKSBwYXJhIG9idGVybW9zIG9zIGdyw6FmaWNvcyBkZSBkZW5zaWRhZGUgZSBhbsOhbGlzYXJtb3MgYXMgYXNzaW1ldHJpYXMgZGEgZGlzdHJpYnVpw6fDo28gZGUgYWxndW1hcyBWYXJpw6F2ZWlzLg0KDQpgYGB7cn0NCiMgT2xoYW5kbyBvcyBub21lcyBkYXMgdmFyacOhdmVpcyBlc3NlIGJhbmNvZCBkZSBkYWRvcyBxdWFudGl0YXRpdm9zLg0KbmFtZXModHJhaW5fcXVhbnRpKQ0KDQojIEV4dHJhaW5kbyBvIHN1bcOhcmlvIGVzdGF0aXN0aWNvDQoNCnN1bW1hcnkodHJhaW5fcXVhbnRpKQ0KDQpsaWJyYXJ5KGUxMDcxKQ0KDQpwYXIobWZyb3c9YygzLDIpKSANCg0KIyBQcmXDp28gZG8gSW3Ds3ZlbA0KcGxvdChkZW5zaXR5KHRyYWluX3F1YW50aSRTYWxlUHJpY2UpLCBtYWluPSJEZW5zaXR5IFBsb3Q6IFNhbGUgUHJpY2UiLCB5bGFiPSJGcmVxdWVuY3kiLA0KICAgICBzdWI9cGFzdGUoIkFzc2ltZXRyaWE6Iiwgcm91bmQoZTEwNzE6OnNrZXduZXNzKHRyYWluX3F1YW50aSRTYWxlUHJpY2UpLCAyKSkpICANCnBvbHlnb24oZGVuc2l0eSh0cmFpbl9xdWFudGkkU2FsZVByaWNlKSwgY29sPSJncmVlbiIpDQoNCiMgTsO6bWVybyBkZSBxdWFydG9zDQpwbG90KGRlbnNpdHkodHJhaW5fcXVhbnRpJEJlZHJvb21BYnZHciksIG1haW49IkRlbnNpdHkgUGxvdDogQmVkcm9vbSIsIHlsYWI9IkZyZXF1ZW5jeSIsDQogICAgIHN1Yj1wYXN0ZSgiQXNzaW1ldHJpYToiLCByb3VuZChlMTA3MTo6c2tld25lc3ModHJhaW5fcXVhbnRpJEJlZHJvb21BYnZHciksIDIpKSkgIA0KcG9seWdvbihkZW5zaXR5KHRyYWluX3F1YW50aSRCZWRyb29tQWJ2R3IpLCBjb2w9Im9yYW5nZSIpDQoNCiMgTsO6bWVyb3MgZGUgQmFuaGVpcm9zDQpwbG90KGRlbnNpdHkodHJhaW5fcXVhbnRpJEZ1bGxCYXRoKSwgbWFpbj0iRGVuc2l0eSBQbG90OiBCYXRocm9vbSIsIHlsYWI9IkZyZXF1ZW5jeSIsDQogICAgIHN1Yj1wYXN0ZSgiQXNzaW1ldHJpYToiLCByb3VuZChlMTA3MTo6c2tld25lc3ModHJhaW5fcXVhbnRpJEZ1bGxCYXRoKSwgMikpKSAgDQpwb2x5Z29uKGRlbnNpdHkodHJhaW5fcXVhbnRpJEZ1bGxCYXRoKSwgY29sPSJncmVlbiIpDQoNCiMgw4FyZWEgZG8gdGVycmVubw0KcGxvdChkZW5zaXR5KHRyYWluX3F1YW50aSRMb3RBcmVhKSwgbWFpbj0iRGVuc2l0eSBQbG90OiBMb3QgQXJlYSBpbiBmZWV0IiwgeWxhYj0iRnJlcXVlbmN5IiwNCiAgICAgc3ViPXBhc3RlKCJBc3NpbWV0cmlhOiIsIHJvdW5kKGUxMDcxOjpza2V3bmVzcyh0cmFpbl9xdWFudGkkTG90QXJlYSksIDIpKSkgIA0KcG9seWdvbihkZW5zaXR5KHRyYWluX3F1YW50aSRMb3RBcmVhKSwgY29sPSJvcmFuZ2UiKQ0KDQojIEFubyBkZSBDb25zdHJ1w6fDo28NCnBsb3QoZGVuc2l0eSh0cmFpbl9xdWFudGkkWWVhckJ1aWx0KSwgbWFpbj0iRGVuc2l0eSBQbG90OiBZZWFyIEJ1aWx0IiwgeWxhYj0iRnJlcXVlbmN5IiwNCiAgICAgc3ViPXBhc3RlKCJBc3NpbWV0cmlhOiIsIHJvdW5kKGUxMDcxOjpza2V3bmVzcyh0cmFpbl9xdWFudGkkWWVhckJ1aWx0KSwgMikpKSAgDQpwb2x5Z29uKGRlbnNpdHkodHJhaW5fcXVhbnRpJFllYXJCdWlsdCksIGNvbD0iZ3JlZW4iKQ0KDQoNCiMgw4FyZWEgZGEgZ2FyYWdlbQ0KcGxvdChkZW5zaXR5KHRyYWluX3F1YW50aSRHYXJhZ2VBcmVhKSwgbWFpbj0iRGVuc2l0eSBQbG90OiBHYXJhZ2UgQXJlYSIsIHlsYWI9IkZyZXF1ZW5jeSIsDQogICAgIHN1Yj1wYXN0ZSgiQXNzaW1ldHJpYToiLCByb3VuZChlMTA3MTo6c2tld25lc3ModHJhaW5fcXVhbnRpJEdhcmFnZUFyZWEpLCAyKSkpICANCnBvbHlnb24oZGVuc2l0eSh0cmFpbl9xdWFudGkkR2FyYWdlQXJlYSksIGNvbD0ib3JhbmdlIikNCg0KDQpgYGANCg0KVmFtb3MgYW5hbGlzYXIgb3MgcHJlw6dvcyBkb3MgaW3Ds3ZlaXMgZGUgYWNvcmRvIGNvbSBvIG7Dum1lcm8gZGUgcXVhcnRvcywgbsO6bWVyb3MgZGUgYmFuaGVpcm9zIGUgdmFnYXMgZGUgZ2FyYWdlbS4gUGFyYSBpc3NvIHZhbW9zIHV0aWxpemFyIG8gYm94cGxvdCBlIHZlciBhIHZhcmlhw6fDo28gZG9zIHZhbG9yZXMgZSBzZSBvY29ycmVtIG91dGxpZXJzIHBhcmEgZXNzZXMgcGFyZXMsIGEgYmlibGlvdGVjYSBnZ3Bsb3QgYXV4aWxpYXLDoSBuZXNzYSBldGFwYS4gDQoNCg0KYGBge3J9DQpsaWJyYXJ5KHBsb3RseSkNCg0KbGlzdGE9YygiRnVsbEJhdGgiLCJCZWRyb29tQWJ2R3IiLCJHYXJhZ2VDYXJzIikNCmxlZ2VuZGE9YygiVmFsb3IgZG8gaW1vdmVsIHBlbGEgbnVtZXJvIGRlIGJhbmhlaXJvcyIsDQogICAgICAgICAgIlZhbG9yIGRvIGltb3ZlbCBwZWxvIG51bWVybyBkZSBxdWFydG9zIiwNCiAgICAgICAgICAiVmFsb3IgZG8gaW1vdmVsIHBlbG8gbnVtZXJvIGRlIHZhZ2FzIikNCg0KcGxvdC5ib3hlczwtZnVuY3Rpb24oWCxsZWdlbmRhKXsNCiAgZ2dwbG90bHkoDQogICAgZ2dwbG90KHRyYWluX3F1YW50aSxhZXNfc3RyaW5nKHg9WCwgeT0iU2FsZVByaWNlIixncm91cD1YLCBmaWxsID0gWCkpKw0KICAgICAgZ2VvbV9ib3hwbG90KCkgKyBsYWJzKHRpdGxlID0gbGVnZW5kYSkrDQogICAgdGhlbWVfY2xhc3NpYygpDQogICkNCiAgDQp9DQoNCk1hcChwbG90LmJveGVzLGxpc3RhLGxlZ2VuZGEpDQpgYGANCkRlIGZhdG8gZXNzYXMgdmFyacOhdmVpcyBzw6NvIGJlbSBpbnRlcmVzc2FudGUgcGFyYSBhbmFsaXNhcm1vcyBjb21vIGVsYXMgc2UgY29tcG9ydGFtIGNvbSBhIHZhcmlhdmVsIFkgU2FsZVByaWNlLiBObyBlbnRhbnRvIG8gbWFpcyBpbXBvcnRhbnRlIGFnb3JhIMOpIGludmVzdGlnYXJtb3MgYXMgY29ycmVsYcOnw7VlcyBlbnRyZSBhcyB2YXJpw6F2ZWlzLiAgUGFyYSBpc3NvIHZhbW9zIHV0aWxpemFyIGEgY29ycmVsYcOnw6NvIGRlIFBlYXJzb24gcXVlIGVuY29udHJhbW9zIG5vIHBhY290ZSBnZ2NvcnBsb3QuDQoNCmBgYHtyfQ0KbGlicmFyeShnZ2NvcnJwbG90KQ0KDQoNCmNvcnIgPC0gcm91bmQoY29yKHRyYWluX3F1YW50aSksIDEpDQoNCiMgUGxvdA0KDQpnZ3Bsb3RseSgNCiAgZ2djb3JycGxvdChjb3JyLA0KICAgICAgICAgICB0eXBlID0gImxvd2VyIiwNCiAgICAgICAgICAgbGFiID0gRiwgDQogICAgICAgICAgIGxhYl9zaXplID0gNSwgIA0KICAgICAgICAgICBjb2xvcnMgPSBjKCJ0b21hdG8yIiwgIndoaXRlIiwgInNwcmluZ2dyZWVuMyIpLA0KICAgICAgICAgICB0aXRsZT0iQ29ycmVsb2dyYW0gb2YgSG91c2UgUHJpY2VzIEthZ2dsZSBEYXRhc2V0IiwgDQogICAgICAgICAgIGdndGhlbWU9dGhlbWVfYncpDQopDQoNCmBgYA0KDQpBc3NpbSBmb2kgcG9zc2l2ZWwgb2JzZXJ2YXIgcXVlbSBwb3NzdWkgbWFpb3IgY29ycmVsYcOnw6NvIHBvc2l0aXZhIG91IG5lZ2F0aXZhIGNvbSBhIHZhcmnDoXZlbCBTYWxlIFByaWNlLiBWYW1vcyBhZ29yYSB0ZW50YXIgcm9kYSB1bWEgcmVncmVzc8OjbyBsaW5lYXIgcXVlIHBvc3NhIHNlIGNhcGF6IGRlIGV4cGxpY2FyIG8gY29tcG9ydGFtZW50byBkbyBwcmXDp28gZG8gaW3Ds3ZlbCBlbSByZWxhw6fDo28gYXMgb3V0cmFzIHZhcmnDoXZlaXMuIFBhcmEgaXNzbyB2YW1vcyBjcmlhciB1bWEgZnVuw6fDo28gDQoNCg0KYGBge3J9DQpscmdfaG91c2VfcHJpY2VzIDwtIGZ1bmN0aW9uKHNlcmllKXsNCiAgZ3JhZmljbyA8LSANCiAgICBnZ3Bsb3RseSgNCiAgICBnZ3Bsb3QodHJhaW5fcXVhbnRpLGFlcyh5PVNhbGVQcmljZSx4PXNlcmllKSkgKw0KICAgICAgICAgZ2VvbV9wb2ludCgpICsgDQogICAgICAgICAgeGxpbSgwLCBtYXgoc2VyaWUpKSArDQogICAgICAgICAgeWxpbSgwLCBtYXgodHJhaW5fcXVhbnRpJFNhbGVQcmljZSkpICsNCiAgICAgICAgICBnZW9tX3Ntb290aChmb3JtdWxhID0geSB+IHgsbWV0aG9kPSJsbSIpDQogICAgKQ0KICAgIHJldHVybihncmFmaWNvKQ0KfQ0KDQpscmdfaG91c2VfcHJpY2VzKHNlcmllID0gdHJhaW5fcXVhbnRpJFllYXJCdWlsdCkNCmxyZ19ob3VzZV9wcmljZXMoc2VyaWUgPSB0cmFpbl9xdWFudGkkWWVhclJlbW9kQWRkKQ0KbHJnX2hvdXNlX3ByaWNlcyhzZXJpZSA9IHRyYWluX3F1YW50aSRMb3RGcm9udGFnZSkNCmxyZ19ob3VzZV9wcmljZXMoc2VyaWUgPSB0cmFpbl9xdWFudGkkR3JMaXZBcmVhKQ0KbHJnX2hvdXNlX3ByaWNlcyhzZXJpZSA9IHRyYWluX3F1YW50aSRHYXJhZ2VDYXJzKQ0KbHJnX2hvdXNlX3ByaWNlcyhzZXJpZSA9IHRyYWluX3F1YW50aSRGdWxsQmF0aCkNCmxyZ19ob3VzZV9wcmljZXMoc2VyaWUgPSB0cmFpbl9xdWFudGkkVG90YWxCc210U0YpDQpscmdfaG91c2VfcHJpY2VzKHNlcmllID0gdHJhaW5fcXVhbnRpJEdhcmFnZUNhcnMpDQpscmdfaG91c2VfcHJpY2VzKHNlcmllID0gdHJhaW5fcXVhbnRpJFgxc3RGbHJTRikNCg0KDQoNCmBgYA0KDQo=